2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
28 * See Copyright for the status of this software.
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
39 #define XML_DIR_SEP '/'
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
70 #ifdef HAVE_SYS_STAT_H
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
90 unsigned int xmlParserMaxDepth = 1024;
94 #define XML_PARSER_BIG_BUFFER_SIZE 300
95 #define XML_PARSER_BUFFER_SIZE 100
97 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
100 * List of XML prefixed PI allowed by W3C specs
103 static const char *xmlW3CPIs[] = {
109 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110 xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
113 static xmlParserErrors
114 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
116 void *user_data, int depth, const xmlChar *URL,
117 const xmlChar *ID, xmlNodePtr *list);
119 #ifdef LIBXML_LEGACY_ENABLED
121 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
123 #endif /* LIBXML_LEGACY_ENABLED */
125 static xmlParserErrors
126 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
129 /************************************************************************
131 * Some factorized error routines *
133 ************************************************************************/
136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
141 * Handle a redefinition of attribute error
144 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
179 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
187 case XML_ERR_INVALID_HEX_CHARREF:
188 errmsg = "CharRef: invalid hexadecimal value\n";
190 case XML_ERR_INVALID_DEC_CHARREF:
191 errmsg = "CharRef: invalid decimal value\n";
193 case XML_ERR_INVALID_CHARREF:
194 errmsg = "CharRef: invalid value\n";
196 case XML_ERR_INTERNAL_ERROR:
197 errmsg = "internal error";
199 case XML_ERR_PEREF_AT_EOF:
200 errmsg = "PEReference at end of document\n";
202 case XML_ERR_PEREF_IN_PROLOG:
203 errmsg = "PEReference in prolog\n";
205 case XML_ERR_PEREF_IN_EPILOG:
206 errmsg = "PEReference in epilog\n";
208 case XML_ERR_PEREF_NO_NAME:
209 errmsg = "PEReference: no name\n";
211 case XML_ERR_PEREF_SEMICOL_MISSING:
212 errmsg = "PEReference: expecting ';'\n";
214 case XML_ERR_ENTITY_LOOP:
215 errmsg = "Detected an entity reference loop\n";
217 case XML_ERR_ENTITY_NOT_STARTED:
218 errmsg = "EntityValue: \" or ' expected\n";
220 case XML_ERR_ENTITY_PE_INTERNAL:
221 errmsg = "PEReferences forbidden in internal subset\n";
223 case XML_ERR_ENTITY_NOT_FINISHED:
224 errmsg = "EntityValue: \" or ' expected\n";
226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
227 errmsg = "AttValue: \" or ' expected\n";
229 case XML_ERR_LT_IN_ATTRIBUTE:
230 errmsg = "Unescaped '<' not allowed in attributes values\n";
232 case XML_ERR_LITERAL_NOT_STARTED:
233 errmsg = "SystemLiteral \" or ' expected\n";
235 case XML_ERR_LITERAL_NOT_FINISHED:
236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
238 case XML_ERR_MISPLACED_CDATA_END:
239 errmsg = "Sequence ']]>' not allowed in content\n";
241 case XML_ERR_URI_REQUIRED:
242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
244 case XML_ERR_PUBID_REQUIRED:
245 errmsg = "PUBLIC, the Public Identifier is missing\n";
247 case XML_ERR_HYPHEN_IN_COMMENT:
248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
250 case XML_ERR_PI_NOT_STARTED:
251 errmsg = "xmlParsePI : no target name\n";
253 case XML_ERR_RESERVED_XML_NAME:
254 errmsg = "Invalid PI name\n";
256 case XML_ERR_NOTATION_NOT_STARTED:
257 errmsg = "NOTATION: Name expected here\n";
259 case XML_ERR_NOTATION_NOT_FINISHED:
260 errmsg = "'>' required to close NOTATION declaration\n";
262 case XML_ERR_VALUE_REQUIRED:
263 errmsg = "Entity value required\n";
265 case XML_ERR_URI_FRAGMENT:
266 errmsg = "Fragment not allowed";
268 case XML_ERR_ATTLIST_NOT_STARTED:
269 errmsg = "'(' required to start ATTLIST enumeration\n";
271 case XML_ERR_NMTOKEN_REQUIRED:
272 errmsg = "NmToken expected in ATTLIST enumeration\n";
274 case XML_ERR_ATTLIST_NOT_FINISHED:
275 errmsg = "')' required to finish ATTLIST enumeration\n";
277 case XML_ERR_MIXED_NOT_STARTED:
278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
280 case XML_ERR_PCDATA_REQUIRED:
281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
284 errmsg = "ContentDecl : Name or '(' expected\n";
286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
289 case XML_ERR_PEREF_IN_INT_SUBSET:
291 "PEReference: forbidden within markup decl in internal subset\n";
293 case XML_ERR_GT_REQUIRED:
294 errmsg = "expected '>'\n";
296 case XML_ERR_CONDSEC_INVALID:
297 errmsg = "XML conditional section '[' expected\n";
299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
300 errmsg = "Content error in the external subset\n";
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
304 "conditional section INCLUDE or IGNORE keyword expected\n";
306 case XML_ERR_CONDSEC_NOT_FINISHED:
307 errmsg = "XML conditional section not closed\n";
309 case XML_ERR_XMLDECL_NOT_STARTED:
310 errmsg = "Text declaration '<?xml' required\n";
312 case XML_ERR_XMLDECL_NOT_FINISHED:
313 errmsg = "parsing XML declaration: '?>' expected\n";
315 case XML_ERR_EXT_ENTITY_STANDALONE:
316 errmsg = "external parsed entities cannot be standalone\n";
318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
319 errmsg = "EntityRef: expecting ';'\n";
321 case XML_ERR_DOCTYPE_NOT_FINISHED:
322 errmsg = "DOCTYPE improperly terminated\n";
324 case XML_ERR_LTSLASH_REQUIRED:
325 errmsg = "EndTag: '</' not found\n";
327 case XML_ERR_EQUAL_REQUIRED:
328 errmsg = "expected '='\n";
330 case XML_ERR_STRING_NOT_CLOSED:
331 errmsg = "String not closed expecting \" or '\n";
333 case XML_ERR_STRING_NOT_STARTED:
334 errmsg = "String not started expecting ' or \"\n";
336 case XML_ERR_ENCODING_NAME:
337 errmsg = "Invalid XML encoding name\n";
339 case XML_ERR_STANDALONE_VALUE:
340 errmsg = "standalone accepts only 'yes' or 'no'\n";
342 case XML_ERR_DOCUMENT_EMPTY:
343 errmsg = "Document is empty\n";
345 case XML_ERR_DOCUMENT_END:
346 errmsg = "Extra content at the end of the document\n";
348 case XML_ERR_NOT_WELL_BALANCED:
349 errmsg = "chunk is not well balanced\n";
351 case XML_ERR_EXTRA_CONTENT:
352 errmsg = "extra content at the end of well balanced chunk\n";
354 case XML_ERR_VERSION_MISSING:
355 errmsg = "Malformed declaration expecting version\n";
363 errmsg = "Unregistered error message\n";
367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
386 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
414 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
417 xmlStructuredErrorFunc schannel = NULL;
419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
426 (ctxt->sax) ? ctxt->sax->warning : NULL,
428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
441 * Handle a validity error.
444 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
447 xmlStructuredErrorFunc schannel = NULL;
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
457 __xmlRaiseError(schannel,
458 ctxt->vctxt.error, ctxt->vctxt.userData,
459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
462 msg, (const char *) str1);
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
478 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
479 const char *msg, int val)
481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
486 __xmlRaiseError(NULL, NULL, NULL,
487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
508 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
517 __xmlRaiseError(NULL, NULL, NULL,
518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
538 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg, const xmlChar * val)
541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
564 * Handle a non fatal parser error
567 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
583 * @ctxt: an XML parser context
584 * @error: the error number
586 * @info1: extra information string
587 * @info2: extra information string
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
607 ctxt->nsWellFormed = 0;
610 /************************************************************************
612 * Library wide options *
614 ************************************************************************/
618 * @feature: the feature to be examined
620 * Examines if the library has been compiled with a given feature.
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
627 xmlHasFeature(xmlFeature feature)
630 case XML_WITH_THREAD:
631 #ifdef LIBXML_THREAD_ENABLED
637 #ifdef LIBXML_TREE_ENABLED
642 case XML_WITH_OUTPUT:
643 #ifdef LIBXML_OUTPUT_ENABLED
649 #ifdef LIBXML_PUSH_ENABLED
654 case XML_WITH_READER:
655 #ifdef LIBXML_READER_ENABLED
660 case XML_WITH_PATTERN:
661 #ifdef LIBXML_PATTERN_ENABLED
666 case XML_WITH_WRITER:
667 #ifdef LIBXML_WRITER_ENABLED
673 #ifdef LIBXML_SAX1_ENABLED
679 #ifdef LIBXML_FTP_ENABLED
685 #ifdef LIBXML_HTTP_ENABLED
691 #ifdef LIBXML_VALID_ENABLED
697 #ifdef LIBXML_HTML_ENABLED
702 case XML_WITH_LEGACY:
703 #ifdef LIBXML_LEGACY_ENABLED
709 #ifdef LIBXML_C14N_ENABLED
714 case XML_WITH_CATALOG:
715 #ifdef LIBXML_CATALOG_ENABLED
721 #ifdef LIBXML_XPATH_ENABLED
727 #ifdef LIBXML_XPTR_ENABLED
732 case XML_WITH_XINCLUDE:
733 #ifdef LIBXML_XINCLUDE_ENABLED
739 #ifdef LIBXML_ICONV_ENABLED
744 case XML_WITH_ISO8859X:
745 #ifdef LIBXML_ISO8859X_ENABLED
750 case XML_WITH_UNICODE:
751 #ifdef LIBXML_UNICODE_ENABLED
756 case XML_WITH_REGEXP:
757 #ifdef LIBXML_REGEXP_ENABLED
762 case XML_WITH_AUTOMATA:
763 #ifdef LIBXML_AUTOMATA_ENABLED
769 #ifdef LIBXML_EXPR_ENABLED
774 case XML_WITH_SCHEMAS:
775 #ifdef LIBXML_SCHEMAS_ENABLED
780 case XML_WITH_SCHEMATRON:
781 #ifdef LIBXML_SCHEMATRON_ENABLED
786 case XML_WITH_MODULES:
787 #ifdef LIBXML_MODULES_ENABLED
793 #ifdef LIBXML_DEBUG_ENABLED
798 case XML_WITH_DEBUG_MEM:
799 #ifdef DEBUG_MEMORY_LOCATION
804 case XML_WITH_DEBUG_RUN:
805 #ifdef LIBXML_DEBUG_RUNTIME
811 #ifdef LIBXML_ZLIB_ENABLED
822 /************************************************************************
824 * SAX2 defaulted attributes handling *
826 ************************************************************************/
830 * @ctxt: an XML parser context
832 * Do the SAX2 detection and specific intialization
835 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
837 #ifdef LIBXML_SAX1_ENABLED
838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
843 #endif /* LIBXML_SAX1_ENABLED */
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
854 typedef struct _xmlDefAttrs xmlDefAttrs;
855 typedef xmlDefAttrs *xmlDefAttrsPtr;
856 struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
869 * Add a defaulted attribute for an element
872 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
879 const xmlChar *prefix;
881 if (ctxt->attsDefault == NULL) {
882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
883 if (ctxt->attsDefault == NULL)
888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
891 name = xmlSplitQName3(fullname, &len);
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
901 * make sure there is some storage
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
906 (4 * 4) * sizeof(const xmlChar *));
907 if (defaults == NULL)
909 defaults->nbAttrs = 0;
910 defaults->maxAttrs = 4;
911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
925 * Split the element name into prefix:localname , the string found
926 * are within the DTD and hen not associated to namespace names.
928 name = xmlSplitQName3(fullattr, &len);
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
949 xmlErrMemory(ctxt, NULL);
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
960 * Register that this attribute is not CDATA
963 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
968 if (ctxt->attsSpecial == NULL) {
969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
970 if (ctxt->attsSpecial == NULL)
974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
979 xmlErrMemory(ctxt, NULL);
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
987 * Checks that the value conforms to the LanguageID production:
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
999 * Returns 1 if correct 0 otherwise
1002 xmlCheckLanguageID(const xmlChar * lang)
1004 const xmlChar *cur = lang;
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1039 while (cur[0] != 0) { /* non input consuming */
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1055 /************************************************************************
1057 * Parser stacks related functions and macros *
1059 ************************************************************************/
1061 xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1071 * Pushes a new parser namespace on top of the ns stack
1073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
1077 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1084 if (ctxt->nsTab[i + 1] == URL)
1086 /* out of scope keep it */
1091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
1097 xmlErrMemory(ctxt, NULL);
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1103 ctxt->nsTab = (const xmlChar **)
1104 xmlRealloc((char *) ctxt->nsTab,
1105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
1107 xmlErrMemory(ctxt, NULL);
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1123 * Returns the number of namespaces removed
1126 nsPop(xmlParserCtxtPtr ctxt, int nr)
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1135 if (ctxt->nsNr <= 0)
1138 for (i = 0;i < nr;i++) {
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1147 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
1152 if (ctxt->atts == NULL) {
1153 maxatts = 55; /* allow for 10 attrs by default */
1154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
1156 if (atts == NULL) goto mem_error;
1158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
1161 ctxt->maxatts = maxatts;
1162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
1164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
1166 if (atts == NULL) goto mem_error;
1168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
1172 ctxt->maxatts = maxatts;
1174 return(ctxt->maxatts);
1176 xmlErrMemory(ctxt, NULL);
1182 * @ctxt: an XML parser context
1183 * @value: the parser input
1185 * Pushes a new parser input on top of the input stack
1187 * Returns 0 in case of error, the index in the stack otherwise
1190 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1192 if ((ctxt == NULL) || (value == NULL))
1194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
1201 xmlErrMemory(ctxt, NULL);
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1211 * @ctxt: an XML parser context
1213 * Pops the top parser input from the input stack
1215 * Returns the input just removed
1218 inputPop(xmlParserCtxtPtr ctxt)
1220 xmlParserInputPtr ret;
1224 if (ctxt->inputNr <= 0)
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1231 ret = ctxt->inputTab[ctxt->inputNr];
1232 ctxt->inputTab[ctxt->inputNr] = NULL;
1237 * @ctxt: an XML parser context
1238 * @value: the element node
1240 * Pushes a new element node on top of the node stack
1242 * Returns 0 in case of error, the index in the stack otherwise
1245 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1247 if (ctxt == NULL) return(0);
1248 if (ctxt->nodeNr >= ctxt->nodeMax) {
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1253 sizeof(ctxt->nodeTab[0]));
1255 xmlErrMemory(ctxt, NULL);
1258 ctxt->nodeTab = tmp;
1261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1265 ctxt->instate = XML_PARSER_EOF;
1268 ctxt->nodeTab[ctxt->nodeNr] = value;
1270 return (ctxt->nodeNr++);
1274 * @ctxt: an XML parser context
1276 * Pops the top element node from the node stack
1278 * Returns the node just removed
1281 nodePop(xmlParserCtxtPtr ctxt)
1285 if (ctxt == NULL) return(NULL);
1286 if (ctxt->nodeNr <= 0)
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
1294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1298 #ifdef LIBXML_PUSH_ENABLED
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1306 * Pushes a new element name/prefix/URL on top of the name stack
1308 * Returns -1 in case of error, the index in the stack otherwise
1311 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1320 sizeof(ctxt->nameTab[0]));
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1328 sizeof(ctxt->pushTab[0]));
1333 ctxt->pushTab = tmp2;
1335 ctxt->nameTab[ctxt->nameNr] = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1340 return (ctxt->nameNr++);
1342 xmlErrMemory(ctxt, NULL);
1347 * @ctxt: an XML parser context
1349 * Pops the top element/prefix/URI name from the name stack
1351 * Returns the name just removed
1353 static const xmlChar *
1354 nameNsPop(xmlParserCtxtPtr ctxt)
1358 if (ctxt->nameNr <= 0)
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1369 #endif /* LIBXML_PUSH_ENABLED */
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1376 * Pushes a new element name on top of the name stack
1378 * Returns -1 in case of error, the index in the stack otherwise
1381 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1383 if (ctxt == NULL) return (-1);
1385 if (ctxt->nameNr >= ctxt->nameMax) {
1386 const xmlChar * *tmp;
1388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1390 sizeof(ctxt->nameTab[0]));
1395 ctxt->nameTab = tmp;
1397 ctxt->nameTab[ctxt->nameNr] = value;
1399 return (ctxt->nameNr++);
1401 xmlErrMemory(ctxt, NULL);
1406 * @ctxt: an XML parser context
1408 * Pops the top element name from the name stack
1410 * Returns the name just removed
1413 namePop(xmlParserCtxtPtr ctxt)
1417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1424 ret = ctxt->nameTab[ctxt->nameNr];
1425 ctxt->nameTab[ctxt->nameNr] = NULL;
1429 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
1435 xmlErrMemory(ctxt, NULL);
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1444 static int spacePop(xmlParserCtxtPtr ctxt) {
1446 if (ctxt->spaceNr <= 0) return(0);
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1451 ctxt->space = &ctxt->spaceTab[0];
1452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1458 * Macros for accessing the content. Those should be used only by the parser,
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
1479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1489 * GROW, SHRINK handling of input buffers
1492 #define RAW (*ctxt->input->cur)
1493 #define CUR (*ctxt->input->cur)
1494 #define NXT(val) ctxt->input->cur[(val)]
1495 #define CUR_PTR ctxt->input->cur
1497 #define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1515 #define SKIP(val) do { \
1516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1518 if ((*ctxt->input->cur == 0) && \
1519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1523 #define SKIPL(val) do { \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1530 ctxt->input->cur++; \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1538 #define SHRINK if ((ctxt->progressive == 0) && \
1539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1543 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1550 #define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1554 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1561 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1563 #define NEXT xmlNextChar(ctxt)
1566 ctxt->input->col++; \
1567 ctxt->input->cur++; \
1569 if (*ctxt->input->cur == 0) \
1570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1573 #define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
1577 ctxt->input->cur += l; \
1578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1581 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1584 #define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
1586 else i += xmlCopyCharMultiByte(&b[i],v)
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1595 * Returns the number of space chars skipped
1599 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1609 * if we are in the document content, go really fast
1611 cur = ctxt->input->cur;
1612 while (IS_BLANK_CH(*cur)) {
1614 ctxt->input->line++; ctxt->input->col = 1;
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1624 ctxt->input->cur = cur;
1629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1640 * Need to handle support of entities branching here
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1648 /************************************************************************
1650 * Commodity functions to handle entities *
1652 ************************************************************************/
1656 * @ctxt: an XML parser context
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1661 * Returns the current xmlChar in the parser context
1664 xmlPopInput(xmlParserCtxtPtr ctxt) {
1665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
1670 if ((*ctxt->input->cur == 0) &&
1671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1685 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1696 inputPush(ctxt, input);
1702 * @ctxt: an XML parser context
1704 * parse Reference declarations
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1713 * Returns the value parsed (as an int), 0 in case of error
1716 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1717 unsigned int val = 0;
1719 unsigned int outofrange = 0;
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1724 if ((RAW == '&') && (NXT(1) == '#') &&
1728 while (RAW != ';') { /* loop blocked by count */
1733 if ((RAW >= '0') && (RAW <= '9'))
1734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1756 } else if ((RAW == '&') && (NXT(1) == '#')) {
1759 while (RAW != ';') { /* loop blocked by count */
1764 if ((RAW >= '0') && (RAW <= '9'))
1765 val = val * 10 + (CUR - '0');
1767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1792 if ((IS_CHAR(val) && (outofrange == 0))) {
1795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1821 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
1827 if ((str == NULL) || (*str == NULL)) return(0);
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1883 if ((IS_CHAR(val) && (outofrange == 0))) {
1886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1901 * Returns the new input stream or NULL
1904 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1906 static xmlParserInputPtr
1907 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1911 if (entity == NULL) {
1912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1923 length = xmlStrlen(entity->name) + 5;
1924 buffer = xmlMallocAtomic(length);
1925 if (buffer == NULL) {
1926 xmlErrMemory(ctxt, NULL);
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1947 * [69] PEReference ::= '%' Name ';'
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1972 * - Included in literal in entity values
1973 * - Included as Parameter Entity reference within DTDs
1976 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1977 const xmlChar *name;
1978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1985 case XML_PARSER_COMMENT:
1987 case XML_PARSER_START_TAG:
1989 case XML_PARSER_END_TAG:
1991 case XML_PARSER_EOF:
1992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
1997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2003 case XML_PARSER_SYSTEM_LITERAL:
2004 case XML_PARSER_PUBLIC_LITERAL:
2005 /* we just ignore it there */
2007 case XML_PARSER_EPILOG:
2008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2010 case XML_PARSER_ENTITY_VALUE:
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2019 case XML_PARSER_DTD:
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2032 case XML_PARSER_IGNORE:
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
2040 "PEReference: %s\n", name);
2042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
2061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2062 "PEReference: %%%s; not found\n", name);
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2088 xmlCharEncoding enc;
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2093 * this is done independently.
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
2102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
2108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
2122 xmlParseTextDecl(ctxt);
2125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2137 * Macro used to grow the current buffer.
2139 #define growBuffer(buffer) { \
2141 buffer##_size *= 2; \
2143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2144 if (tmp == NULL) goto mem_error; \
2149 * xmlStringLenDecodeEntities:
2150 * @ctxt: the parser context
2151 * @str: the input string
2152 * @len: the string length
2153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2158 * Takes a entity string content and process to do the adequate substitutions.
2160 * [67] Reference ::= EntityRef | CharRef
2162 * [69] PEReference ::= '%' Name ';'
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2168 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2173 xmlChar *current = NULL;
2174 const xmlChar *last;
2179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2183 if (ctxt->depth > 40) {
2184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2189 * allocate a translation buffer.
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2193 if (buffer == NULL) goto mem_error;
2196 * OK loop until we reach one of the ending char or a size limit.
2197 * we are operating on already parsed values.
2200 c = CUR_SCHAR(str, l);
2203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2210 COPY_BUF(0,buffer,nbchars,val);
2212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2287 COPY_BUF(l,buffer,nbchars,c);
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2294 c = CUR_SCHAR(str, l);
2298 buffer[nbchars++] = 0;
2302 xmlErrMemory(ctxt, NULL);
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2315 * Takes a entity string content and process to do the adequate substitutions.
2317 * [67] Reference ::= EntityRef | CharRef
2319 * [69] PEReference ::= '%' Name ';'
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2325 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
2327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2332 /************************************************************************
2334 * Commodity functions, cleanup needed ? *
2336 ************************************************************************/
2340 * @ctxt: an XML parser context
2342 * @len: the size of @str
2343 * @blank_chars: we know the chars are blanks
2345 * Is this a sequence of blank chars that one can ignore ?
2347 * Returns 1 if ignorable 0 otherwise.
2350 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2353 xmlNodePtr lastChild;
2356 * Don't spend time trying to differentiate them, the same callback is
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2363 * Check for xml:space value.
2365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
2370 * Check that the string is made of blanks
2372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2378 * Look if the element is mixed content in the DTD if available
2380 if (ctxt->node == NULL) return(0);
2381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2388 * Otherwise, heuristic :-\
2390 if ((RAW != '<') && (RAW != 0xD)) return(0);
2391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
2396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
2398 } else if (xmlNodeIsText(lastChild))
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2406 /************************************************************************
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2411 ************************************************************************/
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2419 * parse an UTF8 encoded XML qualified name string
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2423 * [NS 6] Prefix ::= NCName
2425 * [NS 7] LocalPart ::= NCName
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2432 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2441 if (prefix == NULL) return(NULL);
2444 if (cur == NULL) return(NULL);
2446 #ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2453 /* nasty but well=formed */
2455 return(xmlStrdup(name));
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2470 if (buffer == NULL) {
2471 xmlErrMemory(ctxt, NULL);
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
2480 tmp = (xmlChar *) xmlRealloc(buffer,
2481 max * sizeof(xmlChar));
2484 xmlErrMemory(ctxt, NULL);
2495 if ((c == ':') && (*cur == 0)) {
2499 return(xmlStrdup(name));
2503 ret = xmlStrndup(buf, len);
2507 max = XML_MAX_NAMELEN;
2515 return(xmlStrndup(BAD_CAST "", 0));
2520 * Check that the first character is proper to start
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2527 int first = CUR_SCHAR(cur, l);
2529 if (!IS_LETTER(first) && (first != '_')) {
2530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2531 "Name %s is not XML Namespace compliant\n",
2537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2549 if (buffer == NULL) {
2550 xmlErrMemory(ctxt, NULL);
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
2559 tmp = (xmlChar *) xmlRealloc(buffer,
2560 max * sizeof(xmlChar));
2562 xmlErrMemory(ctxt, NULL);
2575 ret = xmlStrndup(buf, len);
2584 /************************************************************************
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2589 ************************************************************************/
2591 static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2592 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2593 int *len, int *alloc, int normalize);
2597 * @ctxt: an XML parser context
2599 * parse an XML name.
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2606 * [6] Names ::= Name (#x20 Name)*
2608 * Returns the Name parsed or NULL
2612 xmlParseName(xmlParserCtxtPtr ctxt) {
2620 * Accelerator for simple ASCII names
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
2630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
2633 if ((*in > 0) && (*in < 0x80)) {
2634 count = in - ctxt->input->cur;
2635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2636 ctxt->input->cur = in;
2637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
2640 xmlErrMemory(ctxt, NULL);
2644 return(xmlParseNameComplex(ctxt));
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2658 static const xmlChar *
2659 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2674 ctxt->input->cur = in;
2675 return (const xmlChar*) 1;
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
2679 /* strings coming from the dictionnary direct compare possible */
2681 return (const xmlChar*) 1;
2686 static const xmlChar *
2687 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2693 * Handler for more complex cases
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
2707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
2709 if (count++ > 100) {
2717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2727 * parse an XML name.
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2734 * [6] Names ::= Name (#x20 Name)*
2736 * Returns the Name parsed or NULL. The @str pointer
2737 * is updated to the current location in the string.
2741 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2747 c = CUR_SCHAR(cur, l);
2748 if (!IS_LETTER(c) && (c != '_') &&
2753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
2756 (IS_COMBINING(c)) ||
2758 COPY_BUF(l,buf,len,c);
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2770 if (buffer == NULL) {
2771 xmlErrMemory(ctxt, NULL);
2774 memcpy(buffer, buf, len);
2775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2776 /* test bigentname.xml */
2777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
2779 (IS_COMBINING(c)) ||
2781 if (len + 10 > max) {
2784 tmp = (xmlChar *) xmlRealloc(buffer,
2785 max * sizeof(xmlChar));
2787 xmlErrMemory(ctxt, NULL);
2793 COPY_BUF(l,buffer,len,c);
2795 c = CUR_SCHAR(cur, l);
2803 return(xmlStrndup(buf, len));
2808 * @ctxt: an XML parser context
2810 * parse an XML Nmtoken.
2812 * [7] Nmtoken ::= (NameChar)+
2814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2816 * Returns the Nmtoken parsed or NULL
2820 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
2832 (IS_COMBINING(c)) ||
2834 if (count++ > 100) {
2838 COPY_BUF(l,buf,len,c);
2841 if (len >= XML_MAX_NAMELEN) {
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2850 if (buffer == NULL) {
2851 xmlErrMemory(ctxt, NULL);
2854 memcpy(buffer, buf, len);
2855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
2858 (IS_COMBINING(c)) ||
2860 if (count++ > 100) {
2864 if (len + 10 > max) {
2868 tmp = (xmlChar *) xmlRealloc(buffer,
2869 max * sizeof(xmlChar));
2871 xmlErrMemory(ctxt, NULL);
2877 COPY_BUF(l,buffer,len,c);
2887 return(xmlStrndup(buf, len));
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2895 * parse a value for ENTITY declarations
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2900 * Returns the EntityValue parsed with reference substituted or NULL
2904 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2907 int size = XML_PARSER_BUFFER_SIZE;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
2920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2922 xmlErrMemory(ctxt, NULL);
2927 * The content of the entity definition is copied in a buffer.
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
2950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2952 xmlErrMemory(ctxt, NULL);
2958 COPY_BUF(l,buf,len,c);
2961 * Pop-up of finished entities.
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
2989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
2990 "EntityValue: '%c' forbidden except for entities references\n",
2993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
2995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3006 * Then PEReference entities are substituted.
3009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
3033 * @len: the resulting attribute len
3034 * @normalize: wether to apply the inner normalization
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
3038 * of non-ASCII characters, or normalization compaction.
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3043 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3045 xmlChar *buf = NULL;
3048 int c, l, in_space = 0;
3049 xmlChar *current = NULL;
3052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3056 } else if (NXT(0) == '\'') {
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3066 * allocate a translation buffer.
3068 buf_size = XML_PARSER_BUFFER_SIZE;
3069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3070 if (buf == NULL) goto mem_error;
3073 * OK loop until we reach one of the ending char or a size limit.
3076 while ((NXT(0) != limit) && /* checked */
3081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
3085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3095 if (len > buf_size - 10) {
3105 if (len > buf_size - 10) {
3108 len += xmlCopyChar(0, &buf[len], val);
3111 ent = xmlParseEntityRef(ctxt);
3112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3125 buf[len++] = ent->content[0];
3127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3146 if (len > buf_size - 10) {
3149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3157 * This may look absurd but is needed to detect
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3170 * Just output the reference
3173 if (len > buf_size - i - 10) {
3177 buf[len++] = *cur++;
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3210 } else if (RAW != limit) {
3211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212 "AttValue: ' expected\n");
3215 if (attlen != NULL) *attlen = len;
3219 xmlErrMemory(ctxt, NULL);
3225 * @ctxt: an XML parser context
3227 * parse a value for an attribute
3228 * Note: the parser won't do substitution of entities here, this
3229 * will be handled later in xmlStringGetNodeList
3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232 * "'" ([^<&'] | Reference)* "'"
3234 * 3.3.3 Attribute-Value Normalization:
3235 * Before the value of an attribute is passed to the application or
3236 * checked for validity, the XML processor must normalize it as follows:
3237 * - a character reference is processed by appending the referenced
3238 * character to the attribute value
3239 * - an entity reference is processed by recursively processing the
3240 * replacement text of the entity
3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242 * appending #x20 to the normalized value, except that only a single
3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3244 * parsed entity or the literal entity value of an internal parsed entity
3245 * - other characters are processed by appending them to the normalized value
3246 * If the declared value is not CDATA, then the XML processor must further
3247 * process the normalized attribute value by discarding any leading and
3248 * trailing space (#x20) characters, and by replacing sequences of space
3249 * (#x20) characters by a single space (#x20) character.
3250 * All attributes for which no declaration has been read should be treated
3251 * by a non-validating parser as if declared CDATA.
3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3258 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3264 * xmlParseSystemLiteral:
3265 * @ctxt: an XML parser context
3267 * parse an XML Literal
3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3271 * Returns the SystemLiteral parsed or NULL
3275 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276 xmlChar *buf = NULL;
3278 int size = XML_PARSER_BUFFER_SIZE;
3281 int state = ctxt->instate;
3288 } else if (RAW == '\'') {
3292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3298 xmlErrMemory(ctxt, NULL);
3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3304 if (len + 5 >= size) {
3308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3311 xmlErrMemory(ctxt, NULL);
3312 ctxt->instate = (xmlParserInputState) state;
3322 COPY_BUF(l,buf,len,cur);
3332 ctxt->instate = (xmlParserInputState) state;
3333 if (!IS_CHAR(cur)) {
3334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3342 * xmlParsePubidLiteral:
3343 * @ctxt: an XML parser context
3345 * parse an XML public literal
3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3349 * Returns the PubidLiteral parsed or NULL.
3353 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354 xmlChar *buf = NULL;
3356 int size = XML_PARSER_BUFFER_SIZE;
3360 xmlParserInputState oldstate = ctxt->instate;
3366 } else if (RAW == '\'') {
3370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3375 xmlErrMemory(ctxt, NULL);
3378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3381 if (len + 1 >= size) {
3385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3387 xmlErrMemory(ctxt, NULL);
3409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3413 ctxt->instate = oldstate;
3417 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3420 * used for the test in the inner loop of the char data testing
3422 static const unsigned char test_char_data[256] = {
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3459 * @ctxt: an XML parser context
3460 * @cdata: int indicating whether we are within a CDATA section
3462 * parse a CharData section.
3463 * if we are within a CDATA section ']]>' marks an end of section.
3465 * The right angle bracket (>) may be represented using the string ">",
3466 * and must, for compatibility, be escaped using ">" or a character
3467 * reference when it appears in the string "]]>" in content, when that
3468 * string is not marking the end of a CDATA section.
3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3474 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3477 int line = ctxt->input->line;
3478 int col = ctxt->input->col;
3484 * Accelerated common case where input don't need to be
3485 * modified before passing it to the handler.
3488 in = ctxt->input->cur;
3491 while (*in == 0x20) in++;
3494 ctxt->input->line++; ctxt->input->col = 1;
3496 } while (*in == 0xA);
3497 goto get_more_space;
3500 nbchar = in - ctxt->input->cur;
3502 const xmlChar *tmp = ctxt->input->cur;
3503 ctxt->input->cur = in;
3505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->ignorableWhitespace !=
3507 ctxt->sax->characters)) {
3508 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3509 if (ctxt->sax->ignorableWhitespace != NULL)
3510 ctxt->sax->ignorableWhitespace(ctxt->userData,
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3516 if (*ctxt->space == -1)
3519 } else if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->characters != NULL)) {
3521 ctxt->sax->characters(ctxt->userData,
3529 ccol = ctxt->input->col;
3530 while (test_char_data[*in]) {
3534 ctxt->input->col = ccol;
3537 ctxt->input->line++; ctxt->input->col = 1;
3539 } while (*in == 0xA);
3543 if ((in[1] == ']') && (in[2] == '>')) {
3544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3545 ctxt->input->cur = in;
3552 nbchar = in - ctxt->input->cur;
3554 if ((ctxt->sax != NULL) &&
3555 (ctxt->sax->ignorableWhitespace !=
3556 ctxt->sax->characters) &&
3557 (IS_BLANK_CH(*ctxt->input->cur))) {
3558 const xmlChar *tmp = ctxt->input->cur;
3559 ctxt->input->cur = in;
3561 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3562 if (ctxt->sax->ignorableWhitespace != NULL)
3563 ctxt->sax->ignorableWhitespace(ctxt->userData,
3566 if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3569 if (*ctxt->space == -1)
3572 line = ctxt->input->line;
3573 col = ctxt->input->col;
3574 } else if (ctxt->sax != NULL) {
3575 if (ctxt->sax->characters != NULL)
3576 ctxt->sax->characters(ctxt->userData,
3577 ctxt->input->cur, nbchar);
3578 line = ctxt->input->line;
3579 col = ctxt->input->col;
3582 ctxt->input->cur = in;
3586 ctxt->input->cur = in;
3588 ctxt->input->line++; ctxt->input->col = 1;
3589 continue; /* while */
3601 in = ctxt->input->cur;
3602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3605 ctxt->input->line = line;
3606 ctxt->input->col = col;
3607 xmlParseCharDataComplex(ctxt, cdata);
3611 * xmlParseCharDataComplex:
3612 * @ctxt: an XML parser context
3613 * @cdata: int indicating whether we are within a CDATA section
3615 * parse a CharData section.this is the fallback function
3616 * of xmlParseCharData() when the parsing requires handling
3617 * of non-ASCII characters.
3620 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3629 while ((cur != '<') && /* checked */
3631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3632 if ((cur == ']') && (NXT(1) == ']') &&
3636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3639 COPY_BUF(l,buf,nbchar,cur);
3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3644 * OK the segment is to be consumed as chars.
3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3647 if (areBlanks(ctxt, buf, nbchar, 0)) {
3648 if (ctxt->sax->ignorableWhitespace != NULL)
3649 ctxt->sax->ignorableWhitespace(ctxt->userData,
3652 if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3654 if ((ctxt->sax->characters !=
3655 ctxt->sax->ignorableWhitespace) &&
3656 (*ctxt->space == -1))
3673 * OK the segment is to be consumed as chars.
3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3676 if (areBlanks(ctxt, buf, nbchar, 0)) {
3677 if (ctxt->sax->ignorableWhitespace != NULL)
3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683 (*ctxt->space == -1))
3688 if ((cur != 0) && (!IS_CHAR(cur))) {
3689 /* Generate the error and skip the offending character */
3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691 "PCDATA invalid Char value %d\n",
3698 * xmlParseExternalID:
3699 * @ctxt: an XML parser context
3700 * @publicID: a xmlChar** receiving PubidLiteral
3701 * @strict: indicate whether we should restrict parsing to only
3702 * production [75], see NOTE below
3704 * Parse an External ID or a Public ID
3706 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3707 * 'PUBLIC' S PubidLiteral S SystemLiteral
3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3714 * Returns the function returns SystemLiteral and in the second
3715 * case publicID receives PubidLiteral, is strict is off
3716 * it is possible to return NULL and have publicID set.
3720 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721 xmlChar *URI = NULL;
3726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3728 if (!IS_BLANK_CH(CUR)) {
3729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730 "Space required after 'SYSTEM'\n");
3733 URI = xmlParseSystemLiteral(ctxt);
3735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3739 if (!IS_BLANK_CH(CUR)) {
3740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3741 "Space required after 'PUBLIC'\n");
3744 *publicID = xmlParsePubidLiteral(ctxt);
3745 if (*publicID == NULL) {
3746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3750 * We don't handle [83] so "S SystemLiteral" is required.
3752 if (!IS_BLANK_CH(CUR)) {
3753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3754 "Space required after the Public Identifier\n");
3758 * We handle [83] so we return immediately, if
3759 * "S SystemLiteral" is not detected. From a purely parsing
3760 * point of view that's a nice mess.
3766 if (!IS_BLANK_CH(*ptr)) return(NULL);
3768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3772 URI = xmlParseSystemLiteral(ctxt);
3774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3781 * xmlParseCommentComplex:
3782 * @ctxt: an XML parser context
3783 * @buf: the already parsed part of the buffer
3784 * @len: number of bytes filles in the buffer
3785 * @size: allocated size of the buffer
3787 * Skip an XML (SGML) comment <!-- .... -->
3788 * The spec says that "For compatibility, the string "--" (double-hyphen)
3789 * must not occur within comments. "
3790 * This is the slow routine in case the accelerator for ascii didn't work
3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3795 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3799 xmlParserInputPtr input = ctxt->input;
3804 size = XML_PARSER_BUFFER_SIZE;
3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3807 xmlErrMemory(ctxt, NULL);
3813 goto not_terminated;
3817 goto not_terminated;
3821 goto not_terminated;
3822 while (IS_CHAR(cur) && /* checked */
3824 (r != '-') || (q != '-'))) {
3825 if ((r == '-') && (q == '-')) {
3826 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
3828 if (len + 5 >= size) {
3831 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3832 if (new_buf == NULL) {
3834 xmlErrMemory(ctxt, NULL);
3839 COPY_BUF(ql,buf,len,q);
3859 if (!IS_CHAR(cur)) {
3860 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3861 "Comment not terminated \n<!--%.50s\n", buf);
3864 if (input != ctxt->input) {
3865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3866 "Comment doesn't start and stop in the same entity\n");
3869 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3870 (!ctxt->disableSAX))
3871 ctxt->sax->comment(ctxt->userData, buf);
3876 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3877 "Comment not terminated\n", NULL);
3882 * @ctxt: an XML parser context
3884 * Skip an XML (SGML) comment <!-- .... -->
3885 * The spec says that "For compatibility, the string "--" (double-hyphen)
3886 * must not occur within comments. "
3888 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3891 xmlParseComment(xmlParserCtxtPtr ctxt) {
3892 xmlChar *buf = NULL;
3893 int size = XML_PARSER_BUFFER_SIZE;
3895 xmlParserInputState state;
3897 int nbchar = 0, ccol;
3900 * Check that there is a comment right here.
3902 if ((RAW != '<') || (NXT(1) != '!') ||
3903 (NXT(2) != '-') || (NXT(3) != '-')) return;
3905 state = ctxt->instate;
3906 ctxt->instate = XML_PARSER_COMMENT;
3912 * Accelerated common case where input don't need to be
3913 * modified before passing it to the handler.
3915 in = ctxt->input->cur;
3919 ctxt->input->line++; ctxt->input->col = 1;
3921 } while (*in == 0xA);
3924 ccol = ctxt->input->col;
3925 while (((*in > '-') && (*in <= 0x7F)) ||
3926 ((*in >= 0x20) && (*in < '-')) ||
3931 ctxt->input->col = ccol;
3934 ctxt->input->line++; ctxt->input->col = 1;
3936 } while (*in == 0xA);
3939 nbchar = in - ctxt->input->cur;
3941 * save current set of data
3944 if ((ctxt->sax != NULL) &&
3945 (ctxt->sax->comment != NULL)) {
3947 if ((*in == '-') && (in[1] == '-'))
3950 size = XML_PARSER_BUFFER_SIZE + nbchar;
3951 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3953 xmlErrMemory(ctxt, NULL);
3954 ctxt->instate = state;
3958 } else if (len + nbchar + 1 >= size) {
3960 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3961 new_buf = (xmlChar *) xmlRealloc(buf,
3962 size * sizeof(xmlChar));
3963 if (new_buf == NULL) {
3965 xmlErrMemory(ctxt, NULL);
3966 ctxt->instate = state;
3971 memcpy(&buf[len], ctxt->input->cur, nbchar);
3976 ctxt->input->cur = in;
3979 ctxt->input->line++; ctxt->input->col = 1;
3984 ctxt->input->cur = in;
3986 ctxt->input->line++; ctxt->input->col = 1;
3987 continue; /* while */
3993 in = ctxt->input->cur;
3998 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3999 (!ctxt->disableSAX)) {
4001 ctxt->sax->comment(ctxt->userData, buf);
4003 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4007 ctxt->instate = state;
4011 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4012 "Comment not terminated \n<!--%.50s\n",
4015 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4016 "Comment not terminated \n", NULL);
4024 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4025 xmlParseCommentComplex(ctxt, buf, len, size);
4026 ctxt->instate = state;
4033 * @ctxt: an XML parser context
4035 * parse the name of a PI
4037 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4039 * Returns the PITarget name or NULL
4043 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4044 const xmlChar *name;
4046 name = xmlParseName(ctxt);
4047 if ((name != NULL) &&
4048 ((name[0] == 'x') || (name[0] == 'X')) &&
4049 ((name[1] == 'm') || (name[1] == 'M')) &&
4050 ((name[2] == 'l') || (name[2] == 'L'))) {
4052 if ((name[0] == 'x') && (name[1] == 'm') &&
4053 (name[2] == 'l') && (name[3] == 0)) {
4054 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4055 "XML declaration allowed only at the start of the document\n");
4057 } else if (name[3] == 0) {
4058 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4062 if (xmlW3CPIs[i] == NULL) break;
4063 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4066 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4067 "xmlParsePITarget: invalid name prefix 'xml'\n",
4073 #ifdef LIBXML_CATALOG_ENABLED
4075 * xmlParseCatalogPI:
4076 * @ctxt: an XML parser context
4077 * @catalog: the PI value string
4079 * parse an XML Catalog Processing Instruction.
4081 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4083 * Occurs only if allowed by the user and if happening in the Misc
4084 * part of the document before any doctype informations
4085 * This will add the given catalog to the parsing context in order
4086 * to be used if there is a resolution need further down in the document
4090 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4091 xmlChar *URL = NULL;
4092 const xmlChar *tmp, *base;
4096 while (IS_BLANK_CH(*tmp)) tmp++;
4097 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4100 while (IS_BLANK_CH(*tmp)) tmp++;
4105 while (IS_BLANK_CH(*tmp)) tmp++;
4107 if ((marker != '\'') && (marker != '"'))
4111 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4114 URL = xmlStrndup(base, tmp - base);
4116 while (IS_BLANK_CH(*tmp)) tmp++;
4121 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4127 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4128 "Catalog PI syntax error: %s\n",
4137 * @ctxt: an XML parser context
4139 * parse an XML Processing Instruction.
4141 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4143 * The processing is transfered to SAX once parsed.
4147 xmlParsePI(xmlParserCtxtPtr ctxt) {
4148 xmlChar *buf = NULL;
4150 int size = XML_PARSER_BUFFER_SIZE;
4152 const xmlChar *target;
4153 xmlParserInputState state;
4156 if ((RAW == '<') && (NXT(1) == '?')) {
4157 xmlParserInputPtr input = ctxt->input;
4158 state = ctxt->instate;
4159 ctxt->instate = XML_PARSER_PI;
4161 * this is a Processing Instruction.
4167 * Parse the target name and check for special support like
4170 target = xmlParsePITarget(ctxt);
4171 if (target != NULL) {
4172 if ((RAW == '?') && (NXT(1) == '>')) {
4173 if (input != ctxt->input) {
4174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4175 "PI declaration doesn't start and stop in the same entity\n");
4182 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4183 (ctxt->sax->processingInstruction != NULL))
4184 ctxt->sax->processingInstruction(ctxt->userData,
4186 ctxt->instate = state;
4189 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4191 xmlErrMemory(ctxt, NULL);
4192 ctxt->instate = state;
4196 if (!IS_BLANK(cur)) {
4197 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4198 "ParsePI: PI %s space expected\n", target);
4202 while (IS_CHAR(cur) && /* checked */
4203 ((cur != '?') || (NXT(1) != '>'))) {
4204 if (len + 5 >= size) {
4208 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4210 xmlErrMemory(ctxt, NULL);
4212 ctxt->instate = state;
4222 COPY_BUF(l,buf,len,cur);
4233 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4234 "ParsePI: PI %s never end ...\n", target);
4236 if (input != ctxt->input) {
4237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4238 "PI declaration doesn't start and stop in the same entity\n");
4242 #ifdef LIBXML_CATALOG_ENABLED
4243 if (((state == XML_PARSER_MISC) ||
4244 (state == XML_PARSER_START)) &&
4245 (xmlStrEqual(target, XML_CATALOG_PI))) {
4246 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4247 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4248 (allow == XML_CATA_ALLOW_ALL))
4249 xmlParseCatalogPI(ctxt, buf);
4257 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->processingInstruction != NULL))
4259 ctxt->sax->processingInstruction(ctxt->userData,
4264 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4266 ctxt->instate = state;
4271 * xmlParseNotationDecl:
4272 * @ctxt: an XML parser context
4274 * parse a notation declaration
4276 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4278 * Hence there is actually 3 choices:
4279 * 'PUBLIC' S PubidLiteral
4280 * 'PUBLIC' S PubidLiteral S SystemLiteral
4281 * and 'SYSTEM' S SystemLiteral
4283 * See the NOTE on xmlParseExternalID().
4287 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4288 const xmlChar *name;
4292 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4293 xmlParserInputPtr input = ctxt->input;
4296 if (!IS_BLANK_CH(CUR)) {
4297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4298 "Space required after '<!NOTATION'\n");
4303 name = xmlParseName(ctxt);
4305 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4308 if (!IS_BLANK_CH(CUR)) {
4309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4310 "Space required after the NOTATION name'\n");
4318 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4322 if (input != ctxt->input) {
4323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4324 "Notation declaration doesn't start and stop in the same entity\n");
4327 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4328 (ctxt->sax->notationDecl != NULL))
4329 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4331 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4333 if (Systemid != NULL) xmlFree(Systemid);
4334 if (Pubid != NULL) xmlFree(Pubid);
4339 * xmlParseEntityDecl:
4340 * @ctxt: an XML parser context
4342 * parse <!ENTITY declarations
4344 * [70] EntityDecl ::= GEDecl | PEDecl
4346 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4348 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4350 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4352 * [74] PEDef ::= EntityValue | ExternalID
4354 * [76] NDataDecl ::= S 'NDATA' S Name
4356 * [ VC: Notation Declared ]
4357 * The Name must match the declared name of a notation.
4361 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4362 const xmlChar *name = NULL;
4363 xmlChar *value = NULL;
4364 xmlChar *URI = NULL, *literal = NULL;
4365 const xmlChar *ndata = NULL;
4366 int isParameter = 0;
4367 xmlChar *orig = NULL;
4370 /* GROW; done in the caller */
4371 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4372 xmlParserInputPtr input = ctxt->input;
4375 skipped = SKIP_BLANKS;
4377 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4378 "Space required after '<!ENTITY'\n");
4383 skipped = SKIP_BLANKS;
4385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4386 "Space required after '%'\n");
4391 name = xmlParseName(ctxt);
4393 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4394 "xmlParseEntityDecl: no name\n");
4397 skipped = SKIP_BLANKS;
4399 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4400 "Space required after the entity name\n");
4403 ctxt->instate = XML_PARSER_ENTITY_DECL;
4405 * handle the various case of definitions...
4408 if ((RAW == '"') || (RAW == '\'')) {
4409 value = xmlParseEntityValue(ctxt, &orig);
4411 if ((ctxt->sax != NULL) &&
4412 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4413 ctxt->sax->entityDecl(ctxt->userData, name,
4414 XML_INTERNAL_PARAMETER_ENTITY,
4418 URI = xmlParseExternalID(ctxt, &literal, 1);
4419 if ((URI == NULL) && (literal == NULL)) {
4420 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4425 uri = xmlParseURI((const char *) URI);
4427 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4428 "Invalid URI: %s\n", URI);
4430 * This really ought to be a well formedness error
4431 * but the XML Core WG decided otherwise c.f. issue
4432 * E26 of the XML erratas.
4435 if (uri->fragment != NULL) {
4437 * Okay this is foolish to block those but not
4440 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4442 if ((ctxt->sax != NULL) &&
4443 (!ctxt->disableSAX) &&
4444 (ctxt->sax->entityDecl != NULL))
4445 ctxt->sax->entityDecl(ctxt->userData, name,
4446 XML_EXTERNAL_PARAMETER_ENTITY,
4447 literal, URI, NULL);
4454 if ((RAW == '"') || (RAW == '\'')) {
4455 value = xmlParseEntityValue(ctxt, &orig);
4456 if ((ctxt->sax != NULL) &&
4457 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4458 ctxt->sax->entityDecl(ctxt->userData, name,
4459 XML_INTERNAL_GENERAL_ENTITY,
4462 * For expat compatibility in SAX mode.
4464 if ((ctxt->myDoc == NULL) ||
4465 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4466 if (ctxt->myDoc == NULL) {
4467 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4469 if (ctxt->myDoc->intSubset == NULL)
4470 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4471 BAD_CAST "fake", NULL, NULL);
4473 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4477 URI = xmlParseExternalID(ctxt, &literal, 1);
4478 if ((URI == NULL) && (literal == NULL)) {
4479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4484 uri = xmlParseURI((const char *)URI);
4486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4487 "Invalid URI: %s\n", URI);
4489 * This really ought to be a well formedness error
4490 * but the XML Core WG decided otherwise c.f. issue
4491 * E26 of the XML erratas.
4494 if (uri->fragment != NULL) {
4496 * Okay this is foolish to block those but not
4499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4504 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4506 "Space required before 'NDATA'\n");
4509 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4511 if (!IS_BLANK_CH(CUR)) {
4512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4513 "Space required after 'NDATA'\n");
4516 ndata = xmlParseName(ctxt);
4517 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4518 (ctxt->sax->unparsedEntityDecl != NULL))
4519 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4520 literal, URI, ndata);
4522 if ((ctxt->sax != NULL) &&
4523 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4524 ctxt->sax->entityDecl(ctxt->userData, name,
4525 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4526 literal, URI, NULL);
4528 * For expat compatibility in SAX mode.
4529 * assuming the entity repalcement was asked for
4531 if ((ctxt->replaceEntities != 0) &&
4532 ((ctxt->myDoc == NULL) ||
4533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4534 if (ctxt->myDoc == NULL) {
4535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4538 if (ctxt->myDoc->intSubset == NULL)
4539 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4540 BAD_CAST "fake", NULL, NULL);
4541 xmlSAX2EntityDecl(ctxt, name,
4542 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4543 literal, URI, NULL);
4550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4551 "xmlParseEntityDecl: entity %s not terminated\n", name);
4553 if (input != ctxt->input) {
4554 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4555 "Entity declaration doesn't start and stop in the same entity\n");
4561 * Ugly mechanism to save the raw entity value.
4563 xmlEntityPtr cur = NULL;
4566 if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->getParameterEntity != NULL))
4568 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4570 if ((ctxt->sax != NULL) &&
4571 (ctxt->sax->getEntity != NULL))
4572 cur = ctxt->sax->getEntity(ctxt->userData, name);
4573 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4574 cur = xmlSAX2GetEntity(ctxt, name);
4578 if (cur->orig != NULL)
4585 if (value != NULL) xmlFree(value);
4586 if (URI != NULL) xmlFree(URI);
4587 if (literal != NULL) xmlFree(literal);
4592 * xmlParseDefaultDecl:
4593 * @ctxt: an XML parser context
4594 * @value: Receive a possible fixed default value for the attribute
4596 * Parse an attribute default declaration
4598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4600 * [ VC: Required Attribute ]
4601 * if the default declaration is the keyword #REQUIRED, then the
4602 * attribute must be specified for all elements of the type in the
4603 * attribute-list declaration.
4605 * [ VC: Attribute Default Legal ]
4606 * The declared default value must meet the lexical constraints of
4607 * the declared attribute type c.f. xmlValidateAttributeDecl()
4609 * [ VC: Fixed Attribute Default ]
4610 * if an attribute has a default value declared with the #FIXED
4611 * keyword, instances of that attribute must match the default value.
4613 * [ WFC: No < in Attribute Values ]
4614 * handled in xmlParseAttValue()
4616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4617 * or XML_ATTRIBUTE_FIXED.
4621 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4626 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4628 return(XML_ATTRIBUTE_REQUIRED);
4630 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4632 return(XML_ATTRIBUTE_IMPLIED);
4634 val = XML_ATTRIBUTE_NONE;
4635 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4637 val = XML_ATTRIBUTE_FIXED;
4638 if (!IS_BLANK_CH(CUR)) {
4639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4640 "Space required after '#FIXED'\n");
4644 ret = xmlParseAttValue(ctxt);
4645 ctxt->instate = XML_PARSER_DTD;
4647 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4648 "Attribute default value declaration error\n");
4655 * xmlParseNotationType:
4656 * @ctxt: an XML parser context
4658 * parse an Notation attribute type.
4660 * Note: the leading 'NOTATION' S part has already being parsed...
4662 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4664 * [ VC: Notation Attributes ]
4665 * Values of this type must match one of the notation names included
4666 * in the declaration; all notation names in the declaration must be declared.
4668 * Returns: the notation attribute tree built while parsing
4672 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4673 const xmlChar *name;
4674 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4677 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4684 name = xmlParseName(ctxt);
4686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4687 "Name expected in NOTATION declaration\n");
4690 cur = xmlCreateEnumeration(name);
4691 if (cur == NULL) return(ret);
4692 if (last == NULL) ret = last = cur;
4698 } while (RAW == '|');
4700 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4701 if ((last != NULL) && (last != ret))
4702 xmlFreeEnumeration(last);
4710 * xmlParseEnumerationType:
4711 * @ctxt: an XML parser context
4713 * parse an Enumeration attribute type.
4715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4717 * [ VC: Enumeration ]
4718 * Values of this type must match one of the Nmtoken tokens in
4721 * Returns: the enumeration attribute tree built while parsing
4725 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4727 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4737 name = xmlParseNmtoken(ctxt);
4739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4742 cur = xmlCreateEnumeration(name);
4744 if (cur == NULL) return(ret);
4745 if (last == NULL) ret = last = cur;
4751 } while (RAW == '|');
4753 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4761 * xmlParseEnumeratedType:
4762 * @ctxt: an XML parser context
4763 * @tree: the enumeration tree built while parsing
4765 * parse an Enumerated attribute type.
4767 * [57] EnumeratedType ::= NotationType | Enumeration
4769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4776 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4777 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4779 if (!IS_BLANK_CH(CUR)) {
4780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4781 "Space required after 'NOTATION'\n");
4785 *tree = xmlParseNotationType(ctxt);
4786 if (*tree == NULL) return(0);
4787 return(XML_ATTRIBUTE_NOTATION);
4789 *tree = xmlParseEnumerationType(ctxt);
4790 if (*tree == NULL) return(0);
4791 return(XML_ATTRIBUTE_ENUMERATION);
4795 * xmlParseAttributeType:
4796 * @ctxt: an XML parser context
4797 * @tree: the enumeration tree built while parsing
4799 * parse the Attribute list def for an element
4801 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4803 * [55] StringType ::= 'CDATA'
4805 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4806 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4808 * Validity constraints for attribute values syntax are checked in
4809 * xmlValidateAttributeValue()
4812 * Values of type ID must match the Name production. A name must not
4813 * appear more than once in an XML document as a value of this type;
4814 * i.e., ID values must uniquely identify the elements which bear them.
4816 * [ VC: One ID per Element Type ]
4817 * No element type may have more than one ID attribute specified.
4819 * [ VC: ID Attribute Default ]
4820 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4823 * Values of type IDREF must match the Name production, and values
4824 * of type IDREFS must match Names; each IDREF Name must match the value
4825 * of an ID attribute on some element in the XML document; i.e. IDREF
4826 * values must match the value of some ID attribute.
4828 * [ VC: Entity Name ]
4829 * Values of type ENTITY must match the Name production, values
4830 * of type ENTITIES must match Names; each Entity Name must match the
4831 * name of an unparsed entity declared in the DTD.
4833 * [ VC: Name Token ]
4834 * Values of type NMTOKEN must match the Nmtoken production; values
4835 * of type NMTOKENS must match Nmtokens.
4837 * Returns the attribute type
4840 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4842 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
4844 return(XML_ATTRIBUTE_CDATA);
4845 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
4847 return(XML_ATTRIBUTE_IDREFS);
4848 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
4850 return(XML_ATTRIBUTE_IDREF);
4851 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4853 return(XML_ATTRIBUTE_ID);
4854 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
4856 return(XML_ATTRIBUTE_ENTITY);
4857 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
4859 return(XML_ATTRIBUTE_ENTITIES);
4860 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
4862 return(XML_ATTRIBUTE_NMTOKENS);
4863 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
4865 return(XML_ATTRIBUTE_NMTOKEN);
4867 return(xmlParseEnumeratedType(ctxt, tree));
4871 * xmlParseAttributeListDecl:
4872 * @ctxt: an XML parser context
4874 * : parse the Attribute list def for an element
4876 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4878 * [53] AttDef ::= S Name S AttType S DefaultDecl
4882 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4883 const xmlChar *elemName;
4884 const xmlChar *attrName;
4885 xmlEnumerationPtr tree;
4887 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
4888 xmlParserInputPtr input = ctxt->input;
4891 if (!IS_BLANK_CH(CUR)) {
4892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4893 "Space required after '<!ATTLIST'\n");
4896 elemName = xmlParseName(ctxt);
4897 if (elemName == NULL) {
4898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4899 "ATTLIST: no name for Element\n");
4904 while (RAW != '>') {
4905 const xmlChar *check = CUR_PTR;
4908 xmlChar *defaultValue = NULL;
4912 attrName = xmlParseName(ctxt);
4913 if (attrName == NULL) {
4914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4915 "ATTLIST: no name for Attribute\n");
4919 if (!IS_BLANK_CH(CUR)) {
4920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4921 "Space required after the attribute name\n");
4926 type = xmlParseAttributeType(ctxt, &tree);
4932 if (!IS_BLANK_CH(CUR)) {
4933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4934 "Space required after the attribute type\n");
4936 xmlFreeEnumeration(tree);
4941 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4943 if (defaultValue != NULL)
4944 xmlFree(defaultValue);
4946 xmlFreeEnumeration(tree);
4952 if (!IS_BLANK_CH(CUR)) {
4953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4954 "Space required after the attribute default value\n");
4955 if (defaultValue != NULL)
4956 xmlFree(defaultValue);
4958 xmlFreeEnumeration(tree);
4963 if (check == CUR_PTR) {
4964 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4965 "in xmlParseAttributeListDecl\n");
4966 if (defaultValue != NULL)
4967 xmlFree(defaultValue);
4969 xmlFreeEnumeration(tree);
4972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4973 (ctxt->sax->attributeDecl != NULL))
4974 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4975 type, def, defaultValue, tree);
4976 else if (tree != NULL)
4977 xmlFreeEnumeration(tree);
4979 if ((ctxt->sax2) && (defaultValue != NULL) &&
4980 (def != XML_ATTRIBUTE_IMPLIED) &&
4981 (def != XML_ATTRIBUTE_REQUIRED)) {
4982 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4984 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4985 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4987 if (defaultValue != NULL)
4988 xmlFree(defaultValue);
4992 if (input != ctxt->input) {
4993 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4994 "Attribute list declaration doesn't start and stop in the same entity\n");
5002 * xmlParseElementMixedContentDecl:
5003 * @ctxt: an XML parser context
5004 * @inputchk: the input used for the current entity, needed for boundary checks
5006 * parse the declaration for a Mixed Element content
5007 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5009 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5010 * '(' S? '#PCDATA' S? ')'
5012 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5014 * [ VC: No Duplicate Types ]
5015 * The same name must not appear more than once in a single
5016 * mixed-content declaration.
5018 * returns: the list of the xmlElementContentPtr describing the element choices
5020 xmlElementContentPtr
5021 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5022 xmlElementContentPtr ret = NULL, cur = NULL, n;
5023 const xmlChar *elem = NULL;
5026 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5031 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5032 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5033 "Element content declaration doesn't start and stop in the same entity\n",
5037 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5044 if ((RAW == '(') || (RAW == '|')) {
5045 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5046 if (ret == NULL) return(NULL);
5048 while (RAW == '|') {
5051 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5052 if (ret == NULL) return(NULL);
5058 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5059 if (n == NULL) return(NULL);
5060 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5069 elem = xmlParseName(ctxt);
5071 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5072 "xmlParseElementMixedContentDecl : Name expected\n");
5073 xmlFreeDocElementContent(ctxt->myDoc, cur);
5079 if ((RAW == ')') && (NXT(1) == '*')) {
5081 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5082 XML_ELEMENT_CONTENT_ELEMENT);
5083 if (cur->c2 != NULL)
5084 cur->c2->parent = cur;
5086 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5087 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5088 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5089 "Element content declaration doesn't start and stop in the same entity\n",
5094 xmlFreeDocElementContent(ctxt->myDoc, ret);
5095 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5100 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5106 * xmlParseElementChildrenContentDecl:
5107 * @ctxt: an XML parser context
5108 * @inputchk: the input used for the current entity, needed for boundary checks
5110 * parse the declaration for a Mixed Element content
5111 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5114 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5116 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5118 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5120 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5122 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5123 * TODO Parameter-entity replacement text must be properly nested
5124 * with parenthesized groups. That is to say, if either of the
5125 * opening or closing parentheses in a choice, seq, or Mixed
5126 * construct is contained in the replacement text for a parameter
5127 * entity, both must be contained in the same replacement text. For
5128 * interoperability, if a parameter-entity reference appears in a
5129 * choice, seq, or Mixed construct, its replacement text should not
5130 * be empty, and neither the first nor last non-blank character of
5131 * the replacement text should be a connector (| or ,).
5133 * Returns the tree of xmlElementContentPtr describing the element
5136 xmlElementContentPtr
5137 xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5138 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5139 const xmlChar *elem;
5145 int inputid = ctxt->input->id;
5147 /* Recurse on first child */
5150 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5154 elem = xmlParseName(ctxt);
5156 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5159 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5161 xmlErrMemory(ctxt, NULL);
5166 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5168 } else if (RAW == '*') {
5169 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5171 } else if (RAW == '+') {
5172 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5175 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5181 while (RAW != ')') {
5183 * Each loop we parse one separator and one element.
5186 if (type == 0) type = CUR;
5189 * Detect "Name | Name , Name" error
5191 else if (type != CUR) {
5192 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5193 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5195 if ((last != NULL) && (last != ret))
5196 xmlFreeDocElementContent(ctxt->myDoc, last);
5198 xmlFreeDocElementContent(ctxt->myDoc, ret);
5203 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5205 if ((last != NULL) && (last != ret))
5206 xmlFreeDocElementContent(ctxt->myDoc, last);
5207 xmlFreeDocElementContent(ctxt->myDoc, ret);
5225 } else if (RAW == '|') {
5226 if (type == 0) type = CUR;
5229 * Detect "Name , Name | Name" error
5231 else if (type != CUR) {
5232 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5233 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5235 if ((last != NULL) && (last != ret))
5236 xmlFreeDocElementContent(ctxt->myDoc, last);
5238 xmlFreeDocElementContent(ctxt->myDoc, ret);
5243 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5245 if ((last != NULL) && (last != ret))
5246 xmlFreeDocElementContent(ctxt->myDoc, last);
5248 xmlFreeDocElementContent(ctxt->myDoc, ret);
5267 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5269 xmlFreeDocElementContent(ctxt->myDoc, ret);
5276 int inputid = ctxt->input->id;
5277 /* Recurse on second child */
5280 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5283 elem = xmlParseName(ctxt);
5285 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5287 xmlFreeDocElementContent(ctxt->myDoc, ret);
5290 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5292 last->ocur = XML_ELEMENT_CONTENT_OPT;
5294 } else if (RAW == '*') {
5295 last->ocur = XML_ELEMENT_CONTENT_MULT;
5297 } else if (RAW == '+') {
5298 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5301 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5307 if ((cur != NULL) && (last != NULL)) {
5312 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5313 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5314 "Element content declaration doesn't start and stop in the same entity\n",
5320 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5321 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5322 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5324 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5327 } else if (RAW == '*') {
5329 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5332 * Some normalization:
5333 * (a | b* | c?)* == (a | b | c)*
5335 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5336 if ((cur->c1 != NULL) &&
5337 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5338 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5339 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5340 if ((cur->c2 != NULL) &&
5341 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5342 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5343 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5348 } else if (RAW == '+') {
5352 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5356 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5358 * Some normalization:
5359 * (a | b*)+ == (a | b)*
5360 * (a | b?)+ == (a | b)*
5362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5363 if ((cur->c1 != NULL) &&
5364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5369 if ((cur->c2 != NULL) &&
5370 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5371 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5372 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5386 * xmlParseElementContentDecl:
5387 * @ctxt: an XML parser context
5388 * @name: the name of the element being defined.
5389 * @result: the Element Content pointer will be stored here if any
5391 * parse the declaration for an Element content either Mixed or Children,
5392 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5394 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5396 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5400 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5401 xmlElementContentPtr *result) {
5403 xmlElementContentPtr tree = NULL;
5404 int inputid = ctxt->input->id;
5410 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5411 "xmlParseElementContentDecl : %s '(' expected\n", name);
5417 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5418 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5419 res = XML_ELEMENT_TYPE_MIXED;
5421 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5422 res = XML_ELEMENT_TYPE_ELEMENT;
5430 * xmlParseElementDecl:
5431 * @ctxt: an XML parser context
5433 * parse an Element declaration.
5435 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5437 * [ VC: Unique Element Type Declaration ]
5438 * No element type may be declared more than once
5440 * Returns the type of the element, or -1 in case of error
5443 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5444 const xmlChar *name;
5446 xmlElementContentPtr content = NULL;
5448 /* GROW; done in the caller */
5449 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5450 xmlParserInputPtr input = ctxt->input;
5453 if (!IS_BLANK_CH(CUR)) {
5454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455 "Space required after 'ELEMENT'\n");
5458 name = xmlParseName(ctxt);
5460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5461 "xmlParseElementDecl: no name for Element\n");
5464 while ((RAW == 0) && (ctxt->inputNr > 1))
5466 if (!IS_BLANK_CH(CUR)) {
5467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Space required after the element name\n");
5471 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5474 * Element must always be empty.
5476 ret = XML_ELEMENT_TYPE_EMPTY;
5477 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5481 * Element is a generic container.
5483 ret = XML_ELEMENT_TYPE_ANY;
5484 } else if (RAW == '(') {
5485 ret = xmlParseElementContentDecl(ctxt, name, &content);
5488 * [ WFC: PEs in Internal Subset ] error handling.
5490 if ((RAW == '%') && (ctxt->external == 0) &&
5491 (ctxt->inputNr == 1)) {
5492 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5493 "PEReference: forbidden within markup decl in internal subset\n");
5495 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5496 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5503 * Pop-up of finished entities.
5505 while ((RAW == 0) && (ctxt->inputNr > 1))
5510 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5511 if (content != NULL) {
5512 xmlFreeDocElementContent(ctxt->myDoc, content);
5515 if (input != ctxt->input) {
5516 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5517 "Element declaration doesn't start and stop in the same entity\n");
5521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5522 (ctxt->sax->elementDecl != NULL)) {
5523 if (content != NULL)
5524 content->parent = NULL;
5525 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5527 if ((content != NULL) && (content->parent == NULL)) {
5529 * this is a trick: if xmlAddElementDecl is called,
5530 * instead of copying the full tree it is plugged directly
5531 * if called from the parser. Avoid duplicating the
5532 * interfaces or change the API/ABI
5534 xmlFreeDocElementContent(ctxt->myDoc, content);
5536 } else if (content != NULL) {
5537 xmlFreeDocElementContent(ctxt->myDoc, content);
5545 * xmlParseConditionalSections
5546 * @ctxt: an XML parser context
5548 * [61] conditionalSect ::= includeSect | ignoreSect
5549 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5550 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5551 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5552 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5556 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5559 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5563 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5567 if (xmlParserDebugEntities) {
5568 if ((ctxt->input != NULL) && (ctxt->input->filename))
5569 xmlGenericError(xmlGenericErrorContext,
5570 "%s(%d): ", ctxt->input->filename,
5572 xmlGenericError(xmlGenericErrorContext,
5573 "Entering INCLUDE Conditional Section\n");
5576 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5578 const xmlChar *check = CUR_PTR;
5579 unsigned int cons = ctxt->input->consumed;
5581 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5582 xmlParseConditionalSections(ctxt);
5583 } else if (IS_BLANK_CH(CUR)) {
5585 } else if (RAW == '%') {
5586 xmlParsePEReference(ctxt);
5588 xmlParseMarkupDecl(ctxt);
5591 * Pop-up of finished entities.
5593 while ((RAW == 0) && (ctxt->inputNr > 1))
5596 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5597 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5601 if (xmlParserDebugEntities) {
5602 if ((ctxt->input != NULL) && (ctxt->input->filename))
5603 xmlGenericError(xmlGenericErrorContext,
5604 "%s(%d): ", ctxt->input->filename,
5606 xmlGenericError(xmlGenericErrorContext,
5607 "Leaving INCLUDE Conditional Section\n");
5610 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5612 xmlParserInputState instate;
5618 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5622 if (xmlParserDebugEntities) {
5623 if ((ctxt->input != NULL) && (ctxt->input->filename))
5624 xmlGenericError(xmlGenericErrorContext,
5625 "%s(%d): ", ctxt->input->filename,
5627 xmlGenericError(xmlGenericErrorContext,
5628 "Entering IGNORE Conditional Section\n");
5632 * Parse up to the end of the conditional section
5633 * But disable SAX event generating DTD building in the meantime
5635 state = ctxt->disableSAX;
5636 instate = ctxt->instate;
5637 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5638 ctxt->instate = XML_PARSER_IGNORE;
5640 while ((depth >= 0) && (RAW != 0)) {
5641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5646 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5647 if (--depth >= 0) SKIP(3);
5654 ctxt->disableSAX = state;
5655 ctxt->instate = instate;
5657 if (xmlParserDebugEntities) {
5658 if ((ctxt->input != NULL) && (ctxt->input->filename))
5659 xmlGenericError(xmlGenericErrorContext,
5660 "%s(%d): ", ctxt->input->filename,
5662 xmlGenericError(xmlGenericErrorContext,
5663 "Leaving IGNORE Conditional Section\n");
5667 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5674 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5681 * xmlParseMarkupDecl:
5682 * @ctxt: an XML parser context
5684 * parse Markup declarations
5686 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5687 * NotationDecl | PI | Comment
5689 * [ VC: Proper Declaration/PE Nesting ]
5690 * Parameter-entity replacement text must be properly nested with
5691 * markup declarations. That is to say, if either the first character
5692 * or the last character of a markup declaration (markupdecl above) is
5693 * contained in the replacement text for a parameter-entity reference,
5694 * both must be contained in the same replacement text.
5696 * [ WFC: PEs in Internal Subset ]
5697 * In the internal DTD subset, parameter-entity references can occur
5698 * only where markup declarations can occur, not within markup declarations.
5699 * (This does not apply to references that occur in external parameter
5700 * entities or to the external subset.)
5703 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5706 if (NXT(1) == '!') {
5710 xmlParseElementDecl(ctxt);
5711 else if (NXT(3) == 'N')
5712 xmlParseEntityDecl(ctxt);
5715 xmlParseAttributeListDecl(ctxt);
5718 xmlParseNotationDecl(ctxt);
5721 xmlParseComment(ctxt);
5724 /* there is an error but it will be detected later */
5727 } else if (NXT(1) == '?') {
5732 * This is only for internal subset. On external entities,
5733 * the replacement is done before parsing stage
5735 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5736 xmlParsePEReference(ctxt);
5739 * Conditional sections are allowed from entities included
5740 * by PE References in the internal subset.
5742 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5744 xmlParseConditionalSections(ctxt);
5748 ctxt->instate = XML_PARSER_DTD;
5753 * @ctxt: an XML parser context
5755 * parse an XML declaration header for external entities
5757 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5759 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5763 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5765 const xmlChar *encoding;
5768 * We know that '<?xml' is here.
5770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5773 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5777 if (!IS_BLANK_CH(CUR)) {
5778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779 "Space needed after '<?xml'\n");
5784 * We may have the VersionInfo here.
5786 version = xmlParseVersionInfo(ctxt);
5787 if (version == NULL)
5788 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5790 if (!IS_BLANK_CH(CUR)) {
5791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5792 "Space needed here\n");
5795 ctxt->input->version = version;
5798 * We must have the encoding declaration
5800 encoding = xmlParseEncodingDecl(ctxt);
5801 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5803 * The XML REC instructs us to stop parsing right here
5807 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5808 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5809 "Missing encoding in text declaration\n");
5813 if ((RAW == '?') && (NXT(1) == '>')) {
5815 } else if (RAW == '>') {
5816 /* Deprecated old WD ... */
5817 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5820 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5821 MOVETO_ENDTAG(CUR_PTR);
5827 * xmlParseExternalSubset:
5828 * @ctxt: an XML parser context
5829 * @ExternalID: the external identifier
5830 * @SystemID: the system identifier (or URL)
5832 * parse Markup declarations from an external subset
5834 * [30] extSubset ::= textDecl? extSubsetDecl
5836 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5839 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5840 const xmlChar *SystemID) {
5841 xmlDetectSAX2(ctxt);
5843 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
5844 xmlParseTextDecl(ctxt);
5845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5847 * The XML REC instructs us to stop parsing right here
5849 ctxt->instate = XML_PARSER_EOF;
5853 if (ctxt->myDoc == NULL) {
5854 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5856 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5857 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5859 ctxt->instate = XML_PARSER_DTD;
5861 while (((RAW == '<') && (NXT(1) == '?')) ||
5862 ((RAW == '<') && (NXT(1) == '!')) ||
5863 (RAW == '%') || IS_BLANK_CH(CUR)) {
5864 const xmlChar *check = CUR_PTR;
5865 unsigned int cons = ctxt->input->consumed;
5868 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5869 xmlParseConditionalSections(ctxt);
5870 } else if (IS_BLANK_CH(CUR)) {
5872 } else if (RAW == '%') {
5873 xmlParsePEReference(ctxt);
5875 xmlParseMarkupDecl(ctxt);
5878 * Pop-up of finished entities.
5880 while ((RAW == 0) && (ctxt->inputNr > 1))
5883 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5884 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5890 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5896 * xmlParseReference:
5897 * @ctxt: an XML parser context
5899 * parse and handle entity references in content, depending on the SAX
5900 * interface, this may end-up in a call to character() if this is a
5901 * CharRef, a predefined entity, if there is no reference() callback.
5902 * or if the parser was asked to switch to that mode.
5904 * [67] Reference ::= EntityRef | CharRef
5907 xmlParseReference(xmlParserCtxtPtr ctxt) {
5910 if (RAW != '&') return;
5912 if (NXT(1) == '#') {
5916 int value = xmlParseCharRef(ctxt);
5918 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5920 * So we are using non-UTF-8 buffers
5921 * Check that the char fit on 8bits, if not
5922 * generate a CharRef.
5924 if (value <= 0xFF) {
5927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5928 (!ctxt->disableSAX))
5929 ctxt->sax->characters(ctxt->userData, out, 1);
5931 if ((hex == 'x') || (hex == 'X'))
5932 snprintf((char *)out, sizeof(out), "#x%X", value);
5934 snprintf((char *)out, sizeof(out), "#%d", value);
5935 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5936 (!ctxt->disableSAX))
5937 ctxt->sax->reference(ctxt->userData, out);
5941 * Just encode the value in UTF-8
5943 COPY_BUF(0 ,out, i, value);
5945 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5946 (!ctxt->disableSAX))
5947 ctxt->sax->characters(ctxt->userData, out, i);
5952 ent = xmlParseEntityRef(ctxt);
5953 if (ent == NULL) return;
5954 if (!ctxt->wellFormed)
5956 was_checked = ent->checked;
5957 if ((ent->name != NULL) &&
5958 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5959 xmlNodePtr list = NULL;
5960 xmlParserErrors ret = XML_ERR_OK;
5964 * The first reference to the entity trigger a parsing phase
5965 * where the ent->children is filled with the result from
5968 if (ent->checked == 0) {
5971 value = ent->content;
5974 * Check that this entity is well formed
5976 if ((value != NULL) && (value[0] != 0) &&
5977 (value[1] == 0) && (value[0] == '<') &&
5978 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5980 * DONE: get definite answer on this !!!
5981 * Lots of entity decls are used to declare a single
5984 * Which seems to be valid since
5985 * 2.4: The ampersand character (&) and the left angle
5986 * bracket (<) may appear in their literal form only
5987 * when used ... They are also legal within the literal
5988 * entity value of an internal entity declaration;i
5989 * see "4.3.2 Well-Formed Parsed Entities".
5990 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5991 * Looking at the OASIS test suite and James Clark
5992 * tests, this is broken. However the XML REC uses
5993 * it. Is the XML REC not well-formed ????
5994 * This is a hack to avoid this problem
5996 * ANSWER: since lt gt amp .. are already defined,
5997 * this is a redefinition and hence the fact that the
5998 * content is not well balanced is not a Wf error, this
5999 * is lousy but acceptable.
6001 list = xmlNewDocText(ctxt->myDoc, value);
6003 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6004 (ent->children == NULL)) {
6005 ent->children = list;
6008 list->parent = (xmlNodePtr) ent;
6010 xmlFreeNodeList(list);
6012 } else if (list != NULL) {
6013 xmlFreeNodeList(list);
6017 * 4.3.2: An internal general parsed entity is well-formed
6018 * if its replacement text matches the production labeled
6024 * This is a bit hackish but this seems the best
6025 * way to make sure both SAX and DOM entity support
6028 if (ctxt->userData == ctxt)
6031 user_data = ctxt->userData;
6033 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6035 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6036 value, user_data, &list);
6038 } else if (ent->etype ==
6039 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6041 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6042 ctxt->sax, user_data, ctxt->depth,
6043 ent->URI, ent->ExternalID, &list);
6046 ret = XML_ERR_ENTITY_PE_INTERNAL;
6047 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6048 "invalid entity type found\n", NULL);
6050 if (ret == XML_ERR_ENTITY_LOOP) {
6051 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6053 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6054 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6055 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6056 (ent->children == NULL)) {
6057 ent->children = list;
6058 if (ctxt->replaceEntities) {
6060 * Prune it directly in the generated document
6061 * except for single text nodes.
6063 if (((list->type == XML_TEXT_NODE) &&
6064 (list->next == NULL)) ||
6065 (ctxt->parseMode == XML_PARSE_READER)) {
6066 list->parent = (xmlNodePtr) ent;
6071 while (list != NULL) {
6072 list->parent = (xmlNodePtr) ctxt->node;
6073 list->doc = ctxt->myDoc;
6074 if (list->next == NULL)
6078 list = ent->children;
6079 #ifdef LIBXML_LEGACY_ENABLED
6080 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6081 xmlAddEntityReference(ent, list, NULL);
6082 #endif /* LIBXML_LEGACY_ENABLED */
6086 while (list != NULL) {
6087 list->parent = (xmlNodePtr) ent;
6088 if (list->next == NULL)
6094 xmlFreeNodeList(list);
6097 } else if ((ret != XML_ERR_OK) &&
6098 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6099 xmlFatalErr(ctxt, ret, NULL);
6100 } else if (list != NULL) {
6101 xmlFreeNodeList(list);
6108 if (ent->children == NULL) {
6110 * Probably running in SAX mode and the callbacks don't
6111 * build the entity content. So unless we already went
6112 * though parsing for first checking go though the entity
6113 * content to generate callbacks associated to the entity
6115 if (was_checked == 1) {
6118 * This is a bit hackish but this seems the best
6119 * way to make sure both SAX and DOM entity support
6122 if (ctxt->userData == ctxt)
6125 user_data = ctxt->userData;
6127 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6129 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6130 ent->content, user_data, NULL);
6132 } else if (ent->etype ==
6133 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6135 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6136 ctxt->sax, user_data, ctxt->depth,
6137 ent->URI, ent->ExternalID, NULL);
6140 ret = XML_ERR_ENTITY_PE_INTERNAL;
6141 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6142 "invalid entity type found\n", NULL);
6144 if (ret == XML_ERR_ENTITY_LOOP) {
6145 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6149 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6150 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6152 * Entity reference callback comes second, it's somewhat
6153 * superfluous but a compatibility to historical behaviour
6155 ctxt->sax->reference(ctxt->userData, ent->name);
6159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6160 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6164 ctxt->sax->reference(ctxt->userData, ent->name);
6167 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6169 * There is a problem on the handling of _private for entities
6170 * (bug 155816): Should we copy the content of the field from
6171 * the entity (possibly overwriting some value set by the user
6172 * when a copy is created), should we leave it alone, or should
6173 * we try to take care of different situations? The problem
6174 * is exacerbated by the usage of this field by the xmlReader.
6175 * To fix this bug, we look at _private on the created node
6176 * and, if it's NULL, we copy in whatever was in the entity.
6177 * If it's not NULL we leave it alone. This is somewhat of a
6178 * hack - maybe we should have further tests to determine
6181 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6183 * Seems we are generating the DOM content, do
6184 * a simple tree copy for all references except the first
6185 * In the first occurrence list contains the replacement.
6186 * progressive == 2 means we are operating on the Reader
6187 * and since nodes are discarded we must copy all the time.
6189 if (((list == NULL) && (ent->owner == 0)) ||
6190 (ctxt->parseMode == XML_PARSE_READER)) {
6191 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6194 * when operating on a reader, the entities definitions
6195 * are always owning the entities subtree.
6196 if (ctxt->parseMode == XML_PARSE_READER)
6200 cur = ent->children;
6201 while (cur != NULL) {
6202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6204 if (nw->_private == NULL)
6205 nw->_private = cur->_private;
6206 if (firstChild == NULL){
6209 nw = xmlAddChild(ctxt->node, nw);
6211 if (cur == ent->last) {
6213 * needed to detect some strange empty
6214 * node cases in the reader tests
6216 if ((ctxt->parseMode == XML_PARSE_READER) &&
6218 (nw->type == XML_ELEMENT_NODE) &&
6219 (nw->children == NULL))
6226 #ifdef LIBXML_LEGACY_ENABLED
6227 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6228 xmlAddEntityReference(ent, firstChild, nw);
6229 #endif /* LIBXML_LEGACY_ENABLED */
6230 } else if (list == NULL) {
6231 xmlNodePtr nw = NULL, cur, next, last,
6234 * Copy the entity child list and make it the new
6235 * entity child list. The goal is to make sure any
6236 * ID or REF referenced will be the one from the
6237 * document content and not the entity copy.
6239 cur = ent->children;
6240 ent->children = NULL;
6243 while (cur != NULL) {
6247 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6249 if (nw->_private == NULL)
6250 nw->_private = cur->_private;
6251 if (firstChild == NULL){
6254 xmlAddChild((xmlNodePtr) ent, nw);
6255 xmlAddChild(ctxt->node, cur);
6262 #ifdef LIBXML_LEGACY_ENABLED
6263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6264 xmlAddEntityReference(ent, firstChild, nw);
6265 #endif /* LIBXML_LEGACY_ENABLED */
6267 const xmlChar *nbktext;
6270 * the name change is to avoid coalescing of the
6271 * node with a possible previous text one which
6272 * would make ent->children a dangling pointer
6274 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6276 if (ent->children->type == XML_TEXT_NODE)
6277 ent->children->name = nbktext;
6278 if ((ent->last != ent->children) &&
6279 (ent->last->type == XML_TEXT_NODE))
6280 ent->last->name = nbktext;
6281 xmlAddChildList(ctxt->node, ent->children);
6285 * This is to avoid a nasty side effect, see
6286 * characters() in SAX.c
6295 if (val == NULL) return;
6297 * inline the entity.
6299 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6300 (!ctxt->disableSAX))
6301 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6307 * xmlParseEntityRef:
6308 * @ctxt: an XML parser context
6310 * parse ENTITY references declarations
6312 * [68] EntityRef ::= '&' Name ';'
6314 * [ WFC: Entity Declared ]
6315 * In a document without any DTD, a document with only an internal DTD
6316 * subset which contains no parameter entity references, or a document
6317 * with "standalone='yes'", the Name given in the entity reference
6318 * must match that in an entity declaration, except that well-formed
6319 * documents need not declare any of the following entities: amp, lt,
6320 * gt, apos, quot. The declaration of a parameter entity must precede
6321 * any reference to it. Similarly, the declaration of a general entity
6322 * must precede any reference to it which appears in a default value in an
6323 * attribute-list declaration. Note that if entities are declared in the
6324 * external subset or in external parameter entities, a non-validating
6325 * processor is not obligated to read and process their declarations;
6326 * for such documents, the rule that an entity must be declared is a
6327 * well-formedness constraint only if standalone='yes'.
6329 * [ WFC: Parsed Entity ]
6330 * An entity reference must not contain the name of an unparsed entity
6332 * Returns the xmlEntityPtr if found, or NULL otherwise.
6335 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6336 const xmlChar *name;
6337 xmlEntityPtr ent = NULL;
6343 name = xmlParseName(ctxt);
6345 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6346 "xmlParseEntityRef: no name\n");
6351 * Ask first SAX for entity resolution, otherwise try the
6354 if (ctxt->sax != NULL) {
6355 if (ctxt->sax->getEntity != NULL)
6356 ent = ctxt->sax->getEntity(ctxt->userData, name);
6357 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6358 ent = xmlGetPredefinedEntity(name);
6359 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6360 (ctxt->userData==ctxt)) {
6361 ent = xmlSAX2GetEntity(ctxt, name);
6365 * [ WFC: Entity Declared ]
6366 * In a document without any DTD, a document with only an
6367 * internal DTD subset which contains no parameter entity
6368 * references, or a document with "standalone='yes'", the
6369 * Name given in the entity reference must match that in an
6370 * entity declaration, except that well-formed documents
6371 * need not declare any of the following entities: amp, lt,
6373 * The declaration of a parameter entity must precede any
6375 * Similarly, the declaration of a general entity must
6376 * precede any reference to it which appears in a default
6377 * value in an attribute-list declaration. Note that if
6378 * entities are declared in the external subset or in
6379 * external parameter entities, a non-validating processor
6380 * is not obligated to read and process their declarations;
6381 * for such documents, the rule that an entity must be
6382 * declared is a well-formedness constraint only if
6386 if ((ctxt->standalone == 1) ||
6387 ((ctxt->hasExternalSubset == 0) &&
6388 (ctxt->hasPErefs == 0))) {
6389 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6390 "Entity '%s' not defined\n", name);
6392 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6393 "Entity '%s' not defined\n", name);
6394 if ((ctxt->inSubset == 0) &&
6395 (ctxt->sax != NULL) &&
6396 (ctxt->sax->reference != NULL)) {
6397 ctxt->sax->reference(ctxt->userData, name);
6404 * [ WFC: Parsed Entity ]
6405 * An entity reference must not contain the name of an
6408 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6409 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6410 "Entity reference to unparsed entity %s\n", name);
6414 * [ WFC: No External Entity References ]
6415 * Attribute values cannot contain direct or indirect
6416 * entity references to external entities.
6418 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6419 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6420 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6421 "Attribute references external entity '%s'\n", name);
6424 * [ WFC: No < in Attribute Values ]
6425 * The replacement text of any entity referred to directly or
6426 * indirectly in an attribute value (other than "<") must
6429 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6431 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6432 (ent->content != NULL) &&
6433 (xmlStrchr(ent->content, '<'))) {
6434 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6435 "'<' in entity '%s' is not allowed in attributes values\n", name);
6439 * Internal check, no parameter entities here ...
6442 switch (ent->etype) {
6443 case XML_INTERNAL_PARAMETER_ENTITY:
6444 case XML_EXTERNAL_PARAMETER_ENTITY:
6445 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6446 "Attempt to reference the parameter entity '%s'\n",
6455 * [ WFC: No Recursion ]
6456 * A parsed entity must not contain a recursive reference
6457 * to itself, either directly or indirectly.
6458 * Done somewhere else
6462 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6470 * xmlParseStringEntityRef:
6471 * @ctxt: an XML parser context
6472 * @str: a pointer to an index in the string
6474 * parse ENTITY references declarations, but this version parses it from
6477 * [68] EntityRef ::= '&' Name ';'
6479 * [ WFC: Entity Declared ]
6480 * In a document without any DTD, a document with only an internal DTD
6481 * subset which contains no parameter entity references, or a document
6482 * with "standalone='yes'", the Name given in the entity reference
6483 * must match that in an entity declaration, except that well-formed
6484 * documents need not declare any of the following entities: amp, lt,
6485 * gt, apos, quot. The declaration of a parameter entity must precede
6486 * any reference to it. Similarly, the declaration of a general entity
6487 * must precede any reference to it which appears in a default value in an
6488 * attribute-list declaration. Note that if entities are declared in the
6489 * external subset or in external parameter entities, a non-validating
6490 * processor is not obligated to read and process their declarations;
6491 * for such documents, the rule that an entity must be declared is a
6492 * well-formedness constraint only if standalone='yes'.
6494 * [ WFC: Parsed Entity ]
6495 * An entity reference must not contain the name of an unparsed entity
6497 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6498 * is updated to the current location in the string.
6501 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6505 xmlEntityPtr ent = NULL;
6507 if ((str == NULL) || (*str == NULL))
6514 name = xmlParseStringName(ctxt, &ptr);
6516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6517 "xmlParseStringEntityRef: no name\n");
6522 * Ask first SAX for entity resolution, otherwise try the
6525 if (ctxt->sax != NULL) {
6526 if (ctxt->sax->getEntity != NULL)
6527 ent = ctxt->sax->getEntity(ctxt->userData, name);
6529 ent = xmlGetPredefinedEntity(name);
6530 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6531 ent = xmlSAX2GetEntity(ctxt, name);
6535 * [ WFC: Entity Declared ]
6536 * In a document without any DTD, a document with only an
6537 * internal DTD subset which contains no parameter entity
6538 * references, or a document with "standalone='yes'", the
6539 * Name given in the entity reference must match that in an
6540 * entity declaration, except that well-formed documents
6541 * need not declare any of the following entities: amp, lt,
6543 * The declaration of a parameter entity must precede any
6545 * Similarly, the declaration of a general entity must
6546 * precede any reference to it which appears in a default
6547 * value in an attribute-list declaration. Note that if
6548 * entities are declared in the external subset or in
6549 * external parameter entities, a non-validating processor
6550 * is not obligated to read and process their declarations;
6551 * for such documents, the rule that an entity must be
6552 * declared is a well-formedness constraint only if
6556 if ((ctxt->standalone == 1) ||
6557 ((ctxt->hasExternalSubset == 0) &&
6558 (ctxt->hasPErefs == 0))) {
6559 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6560 "Entity '%s' not defined\n", name);
6562 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6563 "Entity '%s' not defined\n",
6566 /* TODO ? check regressions ctxt->valid = 0; */
6570 * [ WFC: Parsed Entity ]
6571 * An entity reference must not contain the name of an
6574 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6575 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6576 "Entity reference to unparsed entity %s\n", name);
6580 * [ WFC: No External Entity References ]
6581 * Attribute values cannot contain direct or indirect
6582 * entity references to external entities.
6584 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6586 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6587 "Attribute references external entity '%s'\n", name);
6590 * [ WFC: No < in Attribute Values ]
6591 * The replacement text of any entity referred to directly or
6592 * indirectly in an attribute value (other than "<") must
6595 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6597 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6598 (ent->content != NULL) &&
6599 (xmlStrchr(ent->content, '<'))) {
6600 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6601 "'<' in entity '%s' is not allowed in attributes values\n",
6606 * Internal check, no parameter entities here ...
6609 switch (ent->etype) {
6610 case XML_INTERNAL_PARAMETER_ENTITY:
6611 case XML_EXTERNAL_PARAMETER_ENTITY:
6612 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6613 "Attempt to reference the parameter entity '%s'\n",
6622 * [ WFC: No Recursion ]
6623 * A parsed entity must not contain a recursive reference
6624 * to itself, either directly or indirectly.
6625 * Done somewhere else
6629 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6639 * xmlParsePEReference:
6640 * @ctxt: an XML parser context
6642 * parse PEReference declarations
6643 * The entity content is handled directly by pushing it's content as
6644 * a new input stream.
6646 * [69] PEReference ::= '%' Name ';'
6648 * [ WFC: No Recursion ]
6649 * A parsed entity must not contain a recursive
6650 * reference to itself, either directly or indirectly.
6652 * [ WFC: Entity Declared ]
6653 * In a document without any DTD, a document with only an internal DTD
6654 * subset which contains no parameter entity references, or a document
6655 * with "standalone='yes'", ... ... The declaration of a parameter
6656 * entity must precede any reference to it...
6658 * [ VC: Entity Declared ]
6659 * In a document with an external subset or external parameter entities
6660 * with "standalone='no'", ... ... The declaration of a parameter entity
6661 * must precede any reference to it...
6664 * Parameter-entity references may only appear in the DTD.
6665 * NOTE: misleading but this is handled.
6668 xmlParsePEReference(xmlParserCtxtPtr ctxt)
6670 const xmlChar *name;
6671 xmlEntityPtr entity = NULL;
6672 xmlParserInputPtr input;
6676 name = xmlParseName(ctxt);
6678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6679 "xmlParsePEReference: no name\n");
6683 if ((ctxt->sax != NULL) &&
6684 (ctxt->sax->getParameterEntity != NULL))
6685 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6687 if (entity == NULL) {
6689 * [ WFC: Entity Declared ]
6690 * In a document without any DTD, a document with only an
6691 * internal DTD subset which contains no parameter entity
6692 * references, or a document with "standalone='yes'", ...
6693 * ... The declaration of a parameter entity must precede
6694 * any reference to it...
6696 if ((ctxt->standalone == 1) ||
6697 ((ctxt->hasExternalSubset == 0) &&
6698 (ctxt->hasPErefs == 0))) {
6699 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6700 "PEReference: %%%s; not found\n",
6704 * [ VC: Entity Declared ]
6705 * In a document with an external subset or external
6706 * parameter entities with "standalone='no'", ...
6707 * ... The declaration of a parameter entity must
6708 * precede any reference to it...
6710 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6711 "PEReference: %%%s; not found\n",
6717 * Internal checking in case the entity quest barfed
6719 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6720 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6721 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6722 "Internal: %%%s; is not a parameter entity\n",
6724 } else if (ctxt->input->free != deallocblankswrapper) {
6726 xmlNewBlanksWrapperInputStream(ctxt, entity);
6727 xmlPushInput(ctxt, input);
6731 * handle the extra spaces added before and after
6732 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6734 input = xmlNewEntityInputStream(ctxt, entity);
6735 xmlPushInput(ctxt, input);
6736 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6737 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6738 (IS_BLANK_CH(NXT(5)))) {
6739 xmlParseTextDecl(ctxt);
6741 XML_ERR_UNSUPPORTED_ENCODING) {
6743 * The XML REC instructs us to stop parsing
6746 ctxt->instate = XML_PARSER_EOF;
6752 ctxt->hasPErefs = 1;
6754 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6761 * xmlParseStringPEReference:
6762 * @ctxt: an XML parser context
6763 * @str: a pointer to an index in the string
6765 * parse PEReference declarations
6767 * [69] PEReference ::= '%' Name ';'
6769 * [ WFC: No Recursion ]
6770 * A parsed entity must not contain a recursive
6771 * reference to itself, either directly or indirectly.
6773 * [ WFC: Entity Declared ]
6774 * In a document without any DTD, a document with only an internal DTD
6775 * subset which contains no parameter entity references, or a document
6776 * with "standalone='yes'", ... ... The declaration of a parameter
6777 * entity must precede any reference to it...
6779 * [ VC: Entity Declared ]
6780 * In a document with an external subset or external parameter entities
6781 * with "standalone='no'", ... ... The declaration of a parameter entity
6782 * must precede any reference to it...
6785 * Parameter-entity references may only appear in the DTD.
6786 * NOTE: misleading but this is handled.
6788 * Returns the string of the entity content.
6789 * str is updated to the current value of the index
6792 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6796 xmlEntityPtr entity = NULL;
6798 if ((str == NULL) || (*str == NULL)) return(NULL);
6804 name = xmlParseStringName(ctxt, &ptr);
6806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6807 "xmlParseStringPEReference: no name\n");
6813 if ((ctxt->sax != NULL) &&
6814 (ctxt->sax->getParameterEntity != NULL))
6815 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6817 if (entity == NULL) {
6819 * [ WFC: Entity Declared ]
6820 * In a document without any DTD, a document with only an
6821 * internal DTD subset which contains no parameter entity
6822 * references, or a document with "standalone='yes'", ...
6823 * ... The declaration of a parameter entity must precede
6824 * any reference to it...
6826 if ((ctxt->standalone == 1) ||
6827 ((ctxt->hasExternalSubset == 0) &&
6828 (ctxt->hasPErefs == 0))) {
6829 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6830 "PEReference: %%%s; not found\n", name);
6833 * [ VC: Entity Declared ]
6834 * In a document with an external subset or external
6835 * parameter entities with "standalone='no'", ...
6836 * ... The declaration of a parameter entity must
6837 * precede any reference to it...
6839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6840 "PEReference: %%%s; not found\n",
6846 * Internal checking in case the entity quest barfed
6848 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6849 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6850 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6851 "%%%s; is not a parameter entity\n",
6855 ctxt->hasPErefs = 1;
6857 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6867 * xmlParseDocTypeDecl:
6868 * @ctxt: an XML parser context
6870 * parse a DOCTYPE declaration
6872 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6873 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6875 * [ VC: Root Element Type ]
6876 * The Name in the document type declaration must match the element
6877 * type of the root element.
6881 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6882 const xmlChar *name = NULL;
6883 xmlChar *ExternalID = NULL;
6884 xmlChar *URI = NULL;
6887 * We know that '<!DOCTYPE' has been detected.
6894 * Parse the DOCTYPE name.
6896 name = xmlParseName(ctxt);
6898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6899 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6901 ctxt->intSubName = name;
6906 * Check for SystemID and ExternalID
6908 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6910 if ((URI != NULL) || (ExternalID != NULL)) {
6911 ctxt->hasExternalSubset = 1;
6913 ctxt->extSubURI = URI;
6914 ctxt->extSubSystem = ExternalID;
6919 * Create and update the internal subset.
6921 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6922 (!ctxt->disableSAX))
6923 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6926 * Is there any internal subset declarations ?
6927 * they are handled separately in xmlParseInternalSubset()
6933 * We should be at the end of the DOCTYPE declaration.
6936 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
6942 * xmlParseInternalSubset:
6943 * @ctxt: an XML parser context
6945 * parse the internal subset declaration
6947 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6951 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6953 * Is there any DTD definition ?
6956 ctxt->instate = XML_PARSER_DTD;
6959 * Parse the succession of Markup declarations and
6961 * Subsequence (markupdecl | PEReference | S)*
6963 while (RAW != ']') {
6964 const xmlChar *check = CUR_PTR;
6965 unsigned int cons = ctxt->input->consumed;
6968 xmlParseMarkupDecl(ctxt);
6969 xmlParsePEReference(ctxt);
6972 * Pop-up of finished entities.
6974 while ((RAW == 0) && (ctxt->inputNr > 1))
6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6978 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6979 "xmlParseInternalSubset: error detected in Markup declaration\n");
6990 * We should be at the end of the DOCTYPE declaration.
6993 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
6998 #ifdef LIBXML_SAX1_ENABLED
7000 * xmlParseAttribute:
7001 * @ctxt: an XML parser context
7002 * @value: a xmlChar ** used to store the value of the attribute
7004 * parse an attribute
7006 * [41] Attribute ::= Name Eq AttValue
7008 * [ WFC: No External Entity References ]
7009 * Attribute values cannot contain direct or indirect entity references
7010 * to external entities.
7012 * [ WFC: No < in Attribute Values ]
7013 * The replacement text of any entity referred to directly or indirectly in
7014 * an attribute value (other than "<") must not contain a <.
7016 * [ VC: Attribute Value Type ]
7017 * The attribute must have been declared; the value must be of the type
7020 * [25] Eq ::= S? '=' S?
7024 * [NS 11] Attribute ::= QName Eq AttValue
7026 * Also the case QName == xmlns:??? is handled independently as a namespace
7029 * Returns the attribute name, and the value in *value.
7033 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7034 const xmlChar *name;
7039 name = xmlParseName(ctxt);
7041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7042 "error parsing attribute name\n");
7053 val = xmlParseAttValue(ctxt);
7054 ctxt->instate = XML_PARSER_CONTENT;
7056 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7057 "Specification mandate value for attribute %s\n", name);
7062 * Check that xml:lang conforms to the specification
7063 * No more registered as an error, just generate a warning now
7064 * since this was deprecated in XML second edition
7066 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7067 if (!xmlCheckLanguageID(val)) {
7068 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7069 "Malformed value for xml:lang : %s\n",
7075 * Check that xml:space conforms to the specification
7077 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7078 if (xmlStrEqual(val, BAD_CAST "default"))
7080 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7083 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7084 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7095 * @ctxt: an XML parser context
7097 * parse a start of tag either for rule element or
7098 * EmptyElement. In both case we don't parse the tag closing chars.
7100 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7102 * [ WFC: Unique Att Spec ]
7103 * No attribute name may appear more than once in the same start-tag or
7104 * empty-element tag.
7106 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7108 * [ WFC: Unique Att Spec ]
7109 * No attribute name may appear more than once in the same start-tag or
7110 * empty-element tag.
7114 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7116 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7118 * Returns the element name parsed
7122 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7123 const xmlChar *name;
7124 const xmlChar *attname;
7126 const xmlChar **atts = ctxt->atts;
7128 int maxatts = ctxt->maxatts;
7131 if (RAW != '<') return(NULL);
7134 name = xmlParseName(ctxt);
7136 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7137 "xmlParseStartTag: invalid element name\n");
7142 * Now parse the attributes, it ends up with the ending
7149 while ((RAW != '>') &&
7150 ((RAW != '/') || (NXT(1) != '>')) &&
7151 (IS_BYTE_CHAR(RAW))) {
7152 const xmlChar *q = CUR_PTR;
7153 unsigned int cons = ctxt->input->consumed;
7155 attname = xmlParseAttribute(ctxt, &attvalue);
7156 if ((attname != NULL) && (attvalue != NULL)) {
7158 * [ WFC: Unique Att Spec ]
7159 * No attribute name may appear more than once in the same
7160 * start-tag or empty-element tag.
7162 for (i = 0; i < nbatts;i += 2) {
7163 if (xmlStrEqual(atts[i], attname)) {
7164 xmlErrAttributeDup(ctxt, NULL, attname);
7170 * Add the pair to atts
7173 maxatts = 22; /* allow for 10 attrs by default */
7174 atts = (const xmlChar **)
7175 xmlMalloc(maxatts * sizeof(xmlChar *));
7177 xmlErrMemory(ctxt, NULL);
7178 if (attvalue != NULL)
7183 ctxt->maxatts = maxatts;
7184 } else if (nbatts + 4 > maxatts) {
7188 n = (const xmlChar **) xmlRealloc((void *) atts,
7189 maxatts * sizeof(const xmlChar *));
7191 xmlErrMemory(ctxt, NULL);
7192 if (attvalue != NULL)
7198 ctxt->maxatts = maxatts;
7200 atts[nbatts++] = attname;
7201 atts[nbatts++] = attvalue;
7202 atts[nbatts] = NULL;
7203 atts[nbatts + 1] = NULL;
7205 if (attvalue != NULL)
7212 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7214 if (!IS_BLANK_CH(RAW)) {
7215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7216 "attributes construct error\n");
7219 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7220 (attname == NULL) && (attvalue == NULL)) {
7221 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7222 "xmlParseStartTag: problem parsing attributes\n");
7230 * SAX: Start of Element !
7232 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7233 (!ctxt->disableSAX)) {
7235 ctxt->sax->startElement(ctxt->userData, name, atts);
7237 ctxt->sax->startElement(ctxt->userData, name, NULL);
7241 /* Free only the content strings */
7242 for (i = 1;i < nbatts;i+=2)
7243 if (atts[i] != NULL)
7244 xmlFree((xmlChar *) atts[i]);
7251 * @ctxt: an XML parser context
7252 * @line: line of the start tag
7253 * @nsNr: number of namespaces on the start tag
7255 * parse an end of tag
7257 * [42] ETag ::= '</' Name S? '>'
7261 * [NS 9] ETag ::= '</' QName S? '>'
7265 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7266 const xmlChar *name;
7269 if ((RAW != '<') || (NXT(1) != '/')) {
7270 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7271 "xmlParseEndTag: '</' not found\n");
7276 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7279 * We should definitely be at the ending "S? '>'" part
7283 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7284 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7289 * [ WFC: Element Type Match ]
7290 * The Name in an element's end-tag must match the element type in the
7294 if (name != (xmlChar*)1) {
7295 if (name == NULL) name = BAD_CAST "unparseable";
7296 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7297 "Opening and ending tag mismatch: %s line %d and %s\n",
7298 ctxt->name, line, name);
7304 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7305 (!ctxt->disableSAX))
7306 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7315 * @ctxt: an XML parser context
7317 * parse an end of tag
7319 * [42] ETag ::= '</' Name S? '>'
7323 * [NS 9] ETag ::= '</' QName S? '>'
7327 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7328 xmlParseEndTag1(ctxt, 0);
7330 #endif /* LIBXML_SAX1_ENABLED */
7332 /************************************************************************
7334 * SAX 2 specific operations *
7336 ************************************************************************/
7338 static const xmlChar *
7339 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7345 * Handler for more complex cases
7349 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7350 (!IS_LETTER(c) && (c != '_'))) {
7354 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7355 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7356 (c == '.') || (c == '-') || (c == '_') ||
7357 (IS_COMBINING(c)) ||
7358 (IS_EXTENDER(c)))) {
7359 if (count++ > 100) {
7367 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7372 * @ctxt: an XML parser context
7373 * @prefix: the prefix to lookup
7375 * Lookup the namespace name for the @prefix (which ca be NULL)
7376 * The prefix must come from the @ctxt->dict dictionnary
7378 * Returns the namespace name or NULL if not bound
7380 static const xmlChar *
7381 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7384 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7385 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7386 if (ctxt->nsTab[i] == prefix) {
7387 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7389 return(ctxt->nsTab[i + 1]);
7396 * @ctxt: an XML parser context
7397 * @len: lenght of the string parsed
7399 * parse an XML name.
7401 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7402 * CombiningChar | Extender
7404 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7406 * Returns the Name parsed or NULL
7409 static const xmlChar *
7410 xmlParseNCName(xmlParserCtxtPtr ctxt) {
7416 * Accelerator for simple ASCII names
7418 in = ctxt->input->cur;
7419 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7420 ((*in >= 0x41) && (*in <= 0x5A)) ||
7423 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7424 ((*in >= 0x41) && (*in <= 0x5A)) ||
7425 ((*in >= 0x30) && (*in <= 0x39)) ||
7426 (*in == '_') || (*in == '-') ||
7429 if ((*in > 0) && (*in < 0x80)) {
7430 count = in - ctxt->input->cur;
7431 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7432 ctxt->input->cur = in;
7433 ctxt->nbChars += count;
7434 ctxt->input->col += count;
7436 xmlErrMemory(ctxt, NULL);
7441 return(xmlParseNCNameComplex(ctxt));
7446 * @ctxt: an XML parser context
7447 * @prefix: pointer to store the prefix part
7449 * parse an XML Namespace QName
7451 * [6] QName ::= (Prefix ':')? LocalPart
7452 * [7] Prefix ::= NCName
7453 * [8] LocalPart ::= NCName
7455 * Returns the Name parsed or NULL
7458 static const xmlChar *
7459 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7460 const xmlChar *l, *p;
7464 l = xmlParseNCName(ctxt);
7467 l = xmlParseName(ctxt);
7469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7470 "Failed to parse QName '%s'\n", l, NULL, NULL);
7480 l = xmlParseNCName(ctxt);
7484 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7485 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7486 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7487 p = xmlDictLookup(ctxt->dict, tmp, -1);
7488 if (tmp != NULL) xmlFree(tmp);
7495 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7496 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7498 tmp = (xmlChar *) xmlParseName(ctxt);
7500 tmp = xmlBuildQName(tmp, l, NULL, 0);
7501 l = xmlDictLookup(ctxt->dict, tmp, -1);
7502 if (tmp != NULL) xmlFree(tmp);
7506 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7507 l = xmlDictLookup(ctxt->dict, tmp, -1);
7508 if (tmp != NULL) xmlFree(tmp);
7519 * xmlParseQNameAndCompare:
7520 * @ctxt: an XML parser context
7521 * @name: the localname
7522 * @prefix: the prefix, if any.
7524 * parse an XML name and compares for match
7525 * (specialized for endtag parsing)
7527 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7528 * and the name for mismatch
7531 static const xmlChar *
7532 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7533 xmlChar const *prefix) {
7534 const xmlChar *cmp = name;
7537 const xmlChar *prefix2;
7539 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7542 in = ctxt->input->cur;
7545 while (*in != 0 && *in == *cmp) {
7549 if ((*cmp == 0) && (*in == ':')) {
7552 while (*in != 0 && *in == *cmp) {
7556 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7558 ctxt->input->cur = in;
7559 return((const xmlChar*) 1);
7563 * all strings coms from the dictionary, equality can be done directly
7565 ret = xmlParseQName (ctxt, &prefix2);
7566 if ((ret == name) && (prefix == prefix2))
7567 return((const xmlChar*) 1);
7572 * xmlParseAttValueInternal:
7573 * @ctxt: an XML parser context
7574 * @len: attribute len result
7575 * @alloc: whether the attribute was reallocated as a new string
7576 * @normalize: if 1 then further non-CDATA normalization must be done
7578 * parse a value for an attribute.
7579 * NOTE: if no normalization is needed, the routine will return pointers
7580 * directly from the data buffer.
7582 * 3.3.3 Attribute-Value Normalization:
7583 * Before the value of an attribute is passed to the application or
7584 * checked for validity, the XML processor must normalize it as follows:
7585 * - a character reference is processed by appending the referenced
7586 * character to the attribute value
7587 * - an entity reference is processed by recursively processing the
7588 * replacement text of the entity
7589 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7590 * appending #x20 to the normalized value, except that only a single
7591 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7592 * parsed entity or the literal entity value of an internal parsed entity
7593 * - other characters are processed by appending them to the normalized value
7594 * If the declared value is not CDATA, then the XML processor must further
7595 * process the normalized attribute value by discarding any leading and
7596 * trailing space (#x20) characters, and by replacing sequences of space
7597 * (#x20) characters by a single space (#x20) character.
7598 * All attributes for which no declaration has been read should be treated
7599 * by a non-validating parser as if declared CDATA.
7601 * Returns the AttValue parsed or NULL. The value has to be freed by the
7602 * caller if it was copied, this can be detected by val[*len] == 0.
7606 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7610 const xmlChar *in = NULL, *start, *end, *last;
7611 xmlChar *ret = NULL;
7614 in = (xmlChar *) CUR_PTR;
7615 if (*in != '"' && *in != '\'') {
7616 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7619 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7622 * try to handle in this routine the most common case where no
7623 * allocation of a new string is required and where content is
7627 end = ctxt->input->end;
7630 const xmlChar *oldbase = ctxt->input->base;
7632 if (oldbase != ctxt->input->base) {
7633 long delta = ctxt->input->base - oldbase;
7634 start = start + delta;
7637 end = ctxt->input->end;
7641 * Skip any leading spaces
7643 while ((in < end) && (*in != limit) &&
7644 ((*in == 0x20) || (*in == 0x9) ||
7645 (*in == 0xA) || (*in == 0xD))) {
7649 const xmlChar *oldbase = ctxt->input->base;
7651 if (oldbase != ctxt->input->base) {
7652 long delta = ctxt->input->base - oldbase;
7653 start = start + delta;
7656 end = ctxt->input->end;
7659 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7660 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7661 if ((*in++ == 0x20) && (*in == 0x20)) break;
7663 const xmlChar *oldbase = ctxt->input->base;
7665 if (oldbase != ctxt->input->base) {
7666 long delta = ctxt->input->base - oldbase;
7667 start = start + delta;
7670 end = ctxt->input->end;
7675 * skip the trailing blanks
7677 while ((last[-1] == 0x20) && (last > start)) last--;
7678 while ((in < end) && (*in != limit) &&
7679 ((*in == 0x20) || (*in == 0x9) ||
7680 (*in == 0xA) || (*in == 0xD))) {
7683 const xmlChar *oldbase = ctxt->input->base;
7685 if (oldbase != ctxt->input->base) {
7686 long delta = ctxt->input->base - oldbase;
7687 start = start + delta;
7689 last = last + delta;
7691 end = ctxt->input->end;
7694 if (*in != limit) goto need_complex;
7696 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7697 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7700 const xmlChar *oldbase = ctxt->input->base;
7702 if (oldbase != ctxt->input->base) {
7703 long delta = ctxt->input->base - oldbase;
7704 start = start + delta;
7707 end = ctxt->input->end;
7711 if (*in != limit) goto need_complex;
7715 *len = last - start;
7716 ret = (xmlChar *) start;
7718 if (alloc) *alloc = 1;
7719 ret = xmlStrndup(start, last - start);
7722 if (alloc) *alloc = 0;
7725 if (alloc) *alloc = 1;
7726 return xmlParseAttValueComplex(ctxt, len, normalize);
7730 * xmlParseAttribute2:
7731 * @ctxt: an XML parser context
7732 * @pref: the element prefix
7733 * @elem: the element name
7734 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7735 * @value: a xmlChar ** used to store the value of the attribute
7736 * @len: an int * to save the length of the attribute
7737 * @alloc: an int * to indicate if the attribute was allocated
7739 * parse an attribute in the new SAX2 framework.
7741 * Returns the attribute name, and the value in *value, .
7744 static const xmlChar *
7745 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7746 const xmlChar *pref, const xmlChar *elem,
7747 const xmlChar **prefix, xmlChar **value,
7748 int *len, int *alloc) {
7749 const xmlChar *name;
7750 xmlChar *val, *internal_val = NULL;
7755 name = xmlParseQName(ctxt, prefix);
7757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7758 "error parsing attribute name\n");
7763 * get the type if needed
7765 if (ctxt->attsSpecial != NULL) {
7768 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7769 pref, elem, *prefix, name);
7770 if (type != 0) normalize = 1;
7780 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
7781 ctxt->instate = XML_PARSER_CONTENT;
7783 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7784 "Specification mandate value for attribute %s\n", name);
7788 if (*prefix == ctxt->str_xml) {
7790 * Check that xml:lang conforms to the specification
7791 * No more registered as an error, just generate a warning now
7792 * since this was deprecated in XML second edition
7794 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7795 internal_val = xmlStrndup(val, *len);
7796 if (!xmlCheckLanguageID(internal_val)) {
7797 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7798 "Malformed value for xml:lang : %s\n",
7799 internal_val, NULL);
7804 * Check that xml:space conforms to the specification
7806 if (xmlStrEqual(name, BAD_CAST "space")) {
7807 internal_val = xmlStrndup(val, *len);
7808 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7810 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7813 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7814 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7815 internal_val, NULL);
7819 xmlFree(internal_val);
7828 * xmlParseStartTag2:
7829 * @ctxt: an XML parser context
7831 * parse a start of tag either for rule element or
7832 * EmptyElement. In both case we don't parse the tag closing chars.
7833 * This routine is called when running SAX2 parsing
7835 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7837 * [ WFC: Unique Att Spec ]
7838 * No attribute name may appear more than once in the same start-tag or
7839 * empty-element tag.
7841 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7843 * [ WFC: Unique Att Spec ]
7844 * No attribute name may appear more than once in the same start-tag or
7845 * empty-element tag.
7849 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7851 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7853 * Returns the element name parsed
7856 static const xmlChar *
7857 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7858 const xmlChar **URI, int *tlen) {
7859 const xmlChar *localname;
7860 const xmlChar *prefix;
7861 const xmlChar *attname;
7862 const xmlChar *aprefix;
7863 const xmlChar *nsname;
7865 const xmlChar **atts = ctxt->atts;
7866 int maxatts = ctxt->maxatts;
7867 int nratts, nbatts, nbdef;
7868 int i, j, nbNs, attval, oldline, oldcol;
7869 const xmlChar *base;
7871 int nsNr = ctxt->nsNr;
7873 if (RAW != '<') return(NULL);
7877 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7878 * point since the attribute values may be stored as pointers to
7879 * the buffer and calling SHRINK would destroy them !
7880 * The Shrinking is only possible once the full set of attribute
7881 * callbacks have been done.
7885 base = ctxt->input->base;
7886 cur = ctxt->input->cur - ctxt->input->base;
7887 oldline = ctxt->input->line;
7888 oldcol = ctxt->input->col;
7894 /* Forget any namespaces added during an earlier parse of this element. */
7897 localname = xmlParseQName(ctxt, &prefix);
7898 if (localname == NULL) {
7899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7900 "StartTag: invalid element name\n");
7903 *tlen = ctxt->input->cur - ctxt->input->base - cur;
7906 * Now parse the attributes, it ends up with the ending
7912 if (ctxt->input->base != base) goto base_changed;
7914 while ((RAW != '>') &&
7915 ((RAW != '/') || (NXT(1) != '>')) &&
7916 (IS_BYTE_CHAR(RAW))) {
7917 const xmlChar *q = CUR_PTR;
7918 unsigned int cons = ctxt->input->consumed;
7919 int len = -1, alloc = 0;
7921 attname = xmlParseAttribute2(ctxt, prefix, localname,
7922 &aprefix, &attvalue, &len, &alloc);
7923 if (ctxt->input->base != base) {
7924 if ((attvalue != NULL) && (alloc != 0))
7929 if ((attname != NULL) && (attvalue != NULL)) {
7930 if (len < 0) len = xmlStrlen(attvalue);
7931 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7932 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7936 uri = xmlParseURI((const char *) URL);
7938 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7939 "xmlns: %s not a valid URI\n",
7942 if (uri->scheme == NULL) {
7943 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7944 "xmlns: URI %s is not absolute\n",
7951 * check that it's not a defined namespace
7953 for (j = 1;j <= nbNs;j++)
7954 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7957 xmlErrAttributeDup(ctxt, NULL, attname);
7959 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
7960 if (alloc != 0) xmlFree(attvalue);
7964 if (aprefix == ctxt->str_xmlns) {
7965 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7968 if (attname == ctxt->str_xml) {
7969 if (URL != ctxt->str_xml_ns) {
7970 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7971 "xml namespace prefix mapped to wrong URI\n",
7975 * Do not keep a namespace definition node
7977 if (alloc != 0) xmlFree(attvalue);
7981 uri = xmlParseURI((const char *) URL);
7983 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7984 "xmlns:%s: '%s' is not a valid URI\n",
7987 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
7988 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7989 "xmlns:%s: URI %s is not absolute\n",
7996 * check that it's not a defined namespace
7998 for (j = 1;j <= nbNs;j++)
7999 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8002 xmlErrAttributeDup(ctxt, aprefix, attname);
8004 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8005 if (alloc != 0) xmlFree(attvalue);
8007 if (ctxt->input->base != base) goto base_changed;
8012 * Add the pair to atts
8014 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8015 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8016 if (attvalue[len] == 0)
8020 maxatts = ctxt->maxatts;
8023 ctxt->attallocs[nratts++] = alloc;
8024 atts[nbatts++] = attname;
8025 atts[nbatts++] = aprefix;
8026 atts[nbatts++] = NULL; /* the URI will be fetched later */
8027 atts[nbatts++] = attvalue;
8029 atts[nbatts++] = attvalue;
8031 * tag if some deallocation is needed
8033 if (alloc != 0) attval = 1;
8035 if ((attvalue != NULL) && (attvalue[len] == 0))
8042 if (ctxt->input->base != base) goto base_changed;
8043 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8045 if (!IS_BLANK_CH(RAW)) {
8046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8047 "attributes construct error\n");
8051 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8052 (attname == NULL) && (attvalue == NULL)) {
8053 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8054 "xmlParseStartTag: problem parsing attributes\n");
8058 if (ctxt->input->base != base) goto base_changed;
8062 * The attributes defaulting
8064 if (ctxt->attsDefault != NULL) {
8065 xmlDefAttrsPtr defaults;
8067 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8068 if (defaults != NULL) {
8069 for (i = 0;i < defaults->nbAttrs;i++) {
8070 attname = defaults->values[4 * i];
8071 aprefix = defaults->values[4 * i + 1];
8074 * special work for namespaces defaulted defs
8076 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8078 * check that it's not a defined namespace
8080 for (j = 1;j <= nbNs;j++)
8081 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8083 if (j <= nbNs) continue;
8085 nsname = xmlGetNamespace(ctxt, NULL);
8086 if (nsname != defaults->values[4 * i + 2]) {
8087 if (nsPush(ctxt, NULL,
8088 defaults->values[4 * i + 2]) > 0)
8091 } else if (aprefix == ctxt->str_xmlns) {
8093 * check that it's not a defined namespace
8095 for (j = 1;j <= nbNs;j++)
8096 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8098 if (j <= nbNs) continue;
8100 nsname = xmlGetNamespace(ctxt, attname);
8101 if (nsname != defaults->values[2]) {
8102 if (nsPush(ctxt, attname,
8103 defaults->values[4 * i + 2]) > 0)
8108 * check that it's not a defined attribute
8110 for (j = 0;j < nbatts;j+=5) {
8111 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8114 if (j < nbatts) continue;
8116 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8117 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8120 maxatts = ctxt->maxatts;
8123 atts[nbatts++] = attname;
8124 atts[nbatts++] = aprefix;
8125 if (aprefix == NULL)
8126 atts[nbatts++] = NULL;
8128 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8129 atts[nbatts++] = defaults->values[4 * i + 2];
8130 atts[nbatts++] = defaults->values[4 * i + 3];
8138 * The attributes checkings
8140 for (i = 0; i < nbatts;i += 5) {
8142 * The default namespace does not apply to attribute names.
8144 if (atts[i + 1] != NULL) {
8145 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8146 if (nsname == NULL) {
8147 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8148 "Namespace prefix %s for %s on %s is not defined\n",
8149 atts[i + 1], atts[i], localname);
8151 atts[i + 2] = nsname;
8155 * [ WFC: Unique Att Spec ]
8156 * No attribute name may appear more than once in the same
8157 * start-tag or empty-element tag.
8158 * As extended by the Namespace in XML REC.
8160 for (j = 0; j < i;j += 5) {
8161 if (atts[i] == atts[j]) {
8162 if (atts[i+1] == atts[j+1]) {
8163 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8166 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8167 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8168 "Namespaced Attribute %s in '%s' redefined\n",
8169 atts[i], nsname, NULL);
8176 nsname = xmlGetNamespace(ctxt, prefix);
8177 if ((prefix != NULL) && (nsname == NULL)) {
8178 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8179 "Namespace prefix %s on %s is not defined\n",
8180 prefix, localname, NULL);
8186 * SAX: Start of Element !
8188 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8189 (!ctxt->disableSAX)) {
8191 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8192 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8193 nbatts / 5, nbdef, atts);
8195 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8196 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8200 * Free up attribute allocated strings if needed
8203 for (i = 3,j = 0; j < nratts;i += 5,j++)
8204 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8205 xmlFree((xmlChar *) atts[i]);
8212 * the attribute strings are valid iif the base didn't changed
8215 for (i = 3,j = 0; j < nratts;i += 5,j++)
8216 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8217 xmlFree((xmlChar *) atts[i]);
8219 ctxt->input->cur = ctxt->input->base + cur;
8220 ctxt->input->line = oldline;
8221 ctxt->input->col = oldcol;
8222 if (ctxt->wellFormed == 1) {
8230 * @ctxt: an XML parser context
8231 * @line: line of the start tag
8232 * @nsNr: number of namespaces on the start tag
8234 * parse an end of tag
8236 * [42] ETag ::= '</' Name S? '>'
8240 * [NS 9] ETag ::= '</' QName S? '>'
8244 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8245 const xmlChar *URI, int line, int nsNr, int tlen) {
8246 const xmlChar *name;
8249 if ((RAW != '<') || (NXT(1) != '/')) {
8250 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8255 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8256 if (ctxt->input->cur[tlen] == '>') {
8257 ctxt->input->cur += tlen + 1;
8260 ctxt->input->cur += tlen;
8264 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8266 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8270 * We should definitely be at the ending "S? '>'" part
8274 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8275 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8280 * [ WFC: Element Type Match ]
8281 * The Name in an element's end-tag must match the element type in the
8285 if (name != (xmlChar*)1) {
8286 if (name == NULL) name = BAD_CAST "unparseable";
8287 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8288 "Opening and ending tag mismatch: %s line %d and %s\n",
8289 ctxt->name, line, name);
8296 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8297 (!ctxt->disableSAX))
8298 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8308 * @ctxt: an XML parser context
8310 * Parse escaped pure raw content.
8312 * [18] CDSect ::= CDStart CData CDEnd
8314 * [19] CDStart ::= '<![CDATA['
8316 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8318 * [21] CDEnd ::= ']]>'
8321 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8322 xmlChar *buf = NULL;
8324 int size = XML_PARSER_BUFFER_SIZE;
8330 /* Check 2.6.0 was NXT(0) not RAW */
8331 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8336 ctxt->instate = XML_PARSER_CDATA_SECTION;
8339 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8340 ctxt->instate = XML_PARSER_CONTENT;
8346 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8347 ctxt->instate = XML_PARSER_CONTENT;
8352 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8354 xmlErrMemory(ctxt, NULL);
8357 while (IS_CHAR(cur) &&
8358 ((r != ']') || (s != ']') || (cur != '>'))) {
8359 if (len + 5 >= size) {
8363 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8366 xmlErrMemory(ctxt, NULL);
8371 COPY_BUF(rl,buf,len,r);
8385 ctxt->instate = XML_PARSER_CONTENT;
8387 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8388 "CData section not finished\n%.50s\n", buf);
8395 * OK the buffer is to be consumed as cdata.
8397 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8398 if (ctxt->sax->cdataBlock != NULL)
8399 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8400 else if (ctxt->sax->characters != NULL)
8401 ctxt->sax->characters(ctxt->userData, buf, len);
8408 * @ctxt: an XML parser context
8412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8416 xmlParseContent(xmlParserCtxtPtr ctxt) {
8418 while ((RAW != 0) &&
8419 ((RAW != '<') || (NXT(1) != '/')) &&
8420 (ctxt->instate != XML_PARSER_EOF)) {
8421 const xmlChar *test = CUR_PTR;
8422 unsigned int cons = ctxt->input->consumed;
8423 const xmlChar *cur = ctxt->input->cur;
8426 * First case : a Processing Instruction.
8428 if ((*cur == '<') && (cur[1] == '?')) {
8433 * Second case : a CDSection
8435 /* 2.6.0 test was *cur not RAW */
8436 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8437 xmlParseCDSect(ctxt);
8441 * Third case : a comment
8443 else if ((*cur == '<') && (NXT(1) == '!') &&
8444 (NXT(2) == '-') && (NXT(3) == '-')) {
8445 xmlParseComment(ctxt);
8446 ctxt->instate = XML_PARSER_CONTENT;
8450 * Fourth case : a sub-element.
8452 else if (*cur == '<') {
8453 xmlParseElement(ctxt);
8457 * Fifth case : a reference. If if has not been resolved,
8458 * parsing returns it's Name, create the node
8461 else if (*cur == '&') {
8462 xmlParseReference(ctxt);
8466 * Last case, text. Note that References are handled directly.
8469 xmlParseCharData(ctxt, 0);
8474 * Pop-up of finished entities.
8476 while ((RAW == 0) && (ctxt->inputNr > 1))
8480 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8481 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482 "detected an error in element content\n");
8483 ctxt->instate = XML_PARSER_EOF;
8491 * @ctxt: an XML parser context
8493 * parse an XML element, this is highly recursive
8495 * [39] element ::= EmptyElemTag | STag content ETag
8497 * [ WFC: Element Type Match ]
8498 * The Name in an element's end-tag must match the element type in the
8504 xmlParseElement(xmlParserCtxtPtr ctxt) {
8505 const xmlChar *name;
8506 const xmlChar *prefix;
8508 xmlParserNodeInfo node_info;
8511 int nsNr = ctxt->nsNr;
8513 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8514 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8515 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8517 ctxt->instate = XML_PARSER_EOF;
8521 /* Capture start position */
8522 if (ctxt->record_info) {
8523 node_info.begin_pos = ctxt->input->consumed +
8524 (CUR_PTR - ctxt->input->base);
8525 node_info.begin_line = ctxt->input->line;
8528 if (ctxt->spaceNr == 0)
8529 spacePush(ctxt, -1);
8530 else if (*ctxt->space == -2)
8531 spacePush(ctxt, -1);
8533 spacePush(ctxt, *ctxt->space);
8535 line = ctxt->input->line;
8536 #ifdef LIBXML_SAX1_ENABLED
8538 #endif /* LIBXML_SAX1_ENABLED */
8539 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8540 #ifdef LIBXML_SAX1_ENABLED
8542 name = xmlParseStartTag(ctxt);
8543 #endif /* LIBXML_SAX1_ENABLED */
8548 namePush(ctxt, name);
8551 #ifdef LIBXML_VALID_ENABLED
8553 * [ VC: Root Element Type ]
8554 * The Name in the document type declaration must match the element
8555 * type of the root element.
8557 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8558 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8559 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8560 #endif /* LIBXML_VALID_ENABLED */
8563 * Check for an Empty Element.
8565 if ((RAW == '/') && (NXT(1) == '>')) {
8568 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8569 (!ctxt->disableSAX))
8570 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8571 #ifdef LIBXML_SAX1_ENABLED
8573 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8574 (!ctxt->disableSAX))
8575 ctxt->sax->endElement(ctxt->userData, name);
8576 #endif /* LIBXML_SAX1_ENABLED */
8580 if (nsNr != ctxt->nsNr)
8581 nsPop(ctxt, ctxt->nsNr - nsNr);
8582 if ( ret != NULL && ctxt->record_info ) {
8583 node_info.end_pos = ctxt->input->consumed +
8584 (CUR_PTR - ctxt->input->base);
8585 node_info.end_line = ctxt->input->line;
8586 node_info.node = ret;
8587 xmlParserAddNodeInfo(ctxt, &node_info);
8594 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8595 "Couldn't find end of Start Tag %s line %d\n",
8599 * end of parsing of this node.
8604 if (nsNr != ctxt->nsNr)
8605 nsPop(ctxt, ctxt->nsNr - nsNr);
8608 * Capture end position and add node
8610 if ( ret != NULL && ctxt->record_info ) {
8611 node_info.end_pos = ctxt->input->consumed +
8612 (CUR_PTR - ctxt->input->base);
8613 node_info.end_line = ctxt->input->line;
8614 node_info.node = ret;
8615 xmlParserAddNodeInfo(ctxt, &node_info);
8621 * Parse the content of the element:
8623 xmlParseContent(ctxt);
8624 if (!IS_BYTE_CHAR(RAW)) {
8625 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8626 "Premature end of data in tag %s line %d\n",
8630 * end of parsing of this node.
8635 if (nsNr != ctxt->nsNr)
8636 nsPop(ctxt, ctxt->nsNr - nsNr);
8641 * parse the end of tag: '</' should be here.
8644 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8647 #ifdef LIBXML_SAX1_ENABLED
8649 xmlParseEndTag1(ctxt, line);
8650 #endif /* LIBXML_SAX1_ENABLED */
8653 * Capture end position and add node
8655 if ( ret != NULL && ctxt->record_info ) {
8656 node_info.end_pos = ctxt->input->consumed +
8657 (CUR_PTR - ctxt->input->base);
8658 node_info.end_line = ctxt->input->line;
8659 node_info.node = ret;
8660 xmlParserAddNodeInfo(ctxt, &node_info);
8665 * xmlParseVersionNum:
8666 * @ctxt: an XML parser context
8668 * parse the XML version value.
8670 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8672 * Returns the string giving the XML version number, or NULL
8675 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8676 xmlChar *buf = NULL;
8681 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8683 xmlErrMemory(ctxt, NULL);
8687 while (((cur >= 'a') && (cur <= 'z')) ||
8688 ((cur >= 'A') && (cur <= 'Z')) ||
8689 ((cur >= '0') && (cur <= '9')) ||
8690 (cur == '_') || (cur == '.') ||
8691 (cur == ':') || (cur == '-')) {
8692 if (len + 1 >= size) {
8696 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8698 xmlErrMemory(ctxt, NULL);
8712 * xmlParseVersionInfo:
8713 * @ctxt: an XML parser context
8715 * parse the XML version.
8717 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8719 * [25] Eq ::= S? '=' S?
8721 * Returns the version string, e.g. "1.0"
8725 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8726 xmlChar *version = NULL;
8728 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8732 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8739 version = xmlParseVersionNum(ctxt);
8741 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8744 } else if (RAW == '\''){
8746 version = xmlParseVersionNum(ctxt);
8748 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8752 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8760 * @ctxt: an XML parser context
8762 * parse the XML encoding name
8764 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8766 * Returns the encoding name value or NULL
8769 xmlParseEncName(xmlParserCtxtPtr ctxt) {
8770 xmlChar *buf = NULL;
8776 if (((cur >= 'a') && (cur <= 'z')) ||
8777 ((cur >= 'A') && (cur <= 'Z'))) {
8778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8780 xmlErrMemory(ctxt, NULL);
8787 while (((cur >= 'a') && (cur <= 'z')) ||
8788 ((cur >= 'A') && (cur <= 'Z')) ||
8789 ((cur >= '0') && (cur <= '9')) ||
8790 (cur == '.') || (cur == '_') ||
8792 if (len + 1 >= size) {
8796 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8798 xmlErrMemory(ctxt, NULL);
8815 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
8821 * xmlParseEncodingDecl:
8822 * @ctxt: an XML parser context
8824 * parse the XML encoding declaration
8826 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8828 * this setups the conversion filters.
8830 * Returns the encoding value or NULL
8834 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8835 xmlChar *encoding = NULL;
8838 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
8842 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8849 encoding = xmlParseEncName(ctxt);
8851 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8854 } else if (RAW == '\''){
8856 encoding = xmlParseEncName(ctxt);
8858 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8862 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8865 * UTF-16 encoding stwich has already taken place at this stage,
8866 * more over the little-endian/big-endian selection is already done
8868 if ((encoding != NULL) &&
8869 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8870 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
8871 if (ctxt->encoding != NULL)
8872 xmlFree((xmlChar *) ctxt->encoding);
8873 ctxt->encoding = encoding;
8876 * UTF-8 encoding is handled natively
8878 else if ((encoding != NULL) &&
8879 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8880 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
8881 if (ctxt->encoding != NULL)
8882 xmlFree((xmlChar *) ctxt->encoding);
8883 ctxt->encoding = encoding;
8885 else if (encoding != NULL) {
8886 xmlCharEncodingHandlerPtr handler;
8888 if (ctxt->input->encoding != NULL)
8889 xmlFree((xmlChar *) ctxt->input->encoding);
8890 ctxt->input->encoding = encoding;
8892 handler = xmlFindCharEncodingHandler((const char *) encoding);
8893 if (handler != NULL) {
8894 xmlSwitchToEncoding(ctxt, handler);
8896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
8897 "Unsupported encoding %s\n", encoding);
8907 * @ctxt: an XML parser context
8909 * parse the XML standalone declaration
8911 * [32] SDDecl ::= S 'standalone' Eq
8912 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8914 * [ VC: Standalone Document Declaration ]
8915 * TODO The standalone document declaration must have the value "no"
8916 * if any external markup declarations contain declarations of:
8917 * - attributes with default values, if elements to which these
8918 * attributes apply appear in the document without specifications
8919 * of values for these attributes, or
8920 * - entities (other than amp, lt, gt, apos, quot), if references
8921 * to those entities appear in the document, or
8922 * - attributes with values subject to normalization, where the
8923 * attribute appears in the document with a value which will change
8924 * as a result of normalization, or
8925 * - element types with element content, if white space occurs directly
8926 * within any instance of those types.
8928 * Returns 1 if standalone, 0 otherwise
8932 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8933 int standalone = -1;
8936 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
8940 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8947 if ((RAW == 'n') && (NXT(1) == 'o')) {
8950 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8955 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
8958 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8961 } else if (RAW == '"'){
8963 if ((RAW == 'n') && (NXT(1) == 'o')) {
8966 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8971 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
8974 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8978 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8986 * @ctxt: an XML parser context
8988 * parse an XML declaration header
8990 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8994 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8998 * This value for standalone indicates that the document has an
8999 * XML declaration but it does not have a standalone attribute.
9000 * It will be overwritten later if a standalone attribute is found.
9002 ctxt->input->standalone = -2;
9005 * We know that '<?xml' is here.
9009 if (!IS_BLANK_CH(RAW)) {
9010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9011 "Blank needed after '<?xml'\n");
9016 * We must have the VersionInfo here.
9018 version = xmlParseVersionInfo(ctxt);
9019 if (version == NULL) {
9020 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9022 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9024 * TODO: Blueberry should be detected here
9026 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9027 "Unsupported version '%s'\n",
9030 if (ctxt->version != NULL)
9031 xmlFree((void *) ctxt->version);
9032 ctxt->version = version;
9036 * We may have the encoding declaration
9038 if (!IS_BLANK_CH(RAW)) {
9039 if ((RAW == '?') && (NXT(1) == '>')) {
9043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9045 xmlParseEncodingDecl(ctxt);
9046 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9048 * The XML REC instructs us to stop parsing right here
9054 * We may have the standalone status.
9056 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9057 if ((RAW == '?') && (NXT(1) == '>')) {
9061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9064 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9067 if ((RAW == '?') && (NXT(1) == '>')) {
9069 } else if (RAW == '>') {
9070 /* Deprecated old WD ... */
9071 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9074 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9075 MOVETO_ENDTAG(CUR_PTR);
9082 * @ctxt: an XML parser context
9084 * parse an XML Misc* optional field.
9086 * [27] Misc ::= Comment | PI | S
9090 xmlParseMisc(xmlParserCtxtPtr ctxt) {
9091 while (((RAW == '<') && (NXT(1) == '?')) ||
9092 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9094 if ((RAW == '<') && (NXT(1) == '?')) {
9096 } else if (IS_BLANK_CH(CUR)) {
9099 xmlParseComment(ctxt);
9105 * @ctxt: an XML parser context
9107 * parse an XML document (and build a tree if using the standard SAX
9110 * [1] document ::= prolog element Misc*
9112 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9114 * Returns 0, -1 in case of error. the parser context is augmented
9115 * as a result of the parsing.
9119 xmlParseDocument(xmlParserCtxtPtr ctxt) {
9121 xmlCharEncoding enc;
9125 if ((ctxt == NULL) || (ctxt->input == NULL))
9131 * SAX: detecting the level.
9133 xmlDetectSAX2(ctxt);
9136 * SAX: beginning of the document processing.
9138 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9139 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9141 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9142 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9144 * Get the 4 first bytes and decode the charset
9145 * if enc != XML_CHAR_ENCODING_NONE
9146 * plug some encoding conversion routines.
9152 enc = xmlDetectCharEncoding(&start[0], 4);
9153 if (enc != XML_CHAR_ENCODING_NONE) {
9154 xmlSwitchEncoding(ctxt, enc);
9160 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9164 * Check for the XMLDecl in the Prolog.
9167 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9170 * Note that we will switch encoding on the fly.
9172 xmlParseXMLDecl(ctxt);
9173 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9175 * The XML REC instructs us to stop parsing right here
9179 ctxt->standalone = ctxt->input->standalone;
9182 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9184 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9185 ctxt->sax->startDocument(ctxt->userData);
9188 * The Misc part of the Prolog
9194 * Then possibly doc type declaration(s) and more Misc
9195 * (doctypedecl Misc*)?
9198 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9201 xmlParseDocTypeDecl(ctxt);
9203 ctxt->instate = XML_PARSER_DTD;
9204 xmlParseInternalSubset(ctxt);
9208 * Create and update the external subset.
9211 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9212 (!ctxt->disableSAX))
9213 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9214 ctxt->extSubSystem, ctxt->extSubURI);
9218 ctxt->instate = XML_PARSER_PROLOG;
9223 * Time to start parsing the tree itself
9227 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9228 "Start tag expected, '<' not found\n");
9230 ctxt->instate = XML_PARSER_CONTENT;
9231 xmlParseElement(ctxt);
9232 ctxt->instate = XML_PARSER_EPILOG;
9236 * The Misc part at the end
9241 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9243 ctxt->instate = XML_PARSER_EOF;
9247 * SAX: end of the document processing.
9249 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9250 ctxt->sax->endDocument(ctxt->userData);
9253 * Remove locally kept entity definitions if the tree was not built
9255 if ((ctxt->myDoc != NULL) &&
9256 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9257 xmlFreeDoc(ctxt->myDoc);
9261 if (! ctxt->wellFormed) {
9269 * xmlParseExtParsedEnt:
9270 * @ctxt: an XML parser context
9272 * parse a general parsed entity
9273 * An external general parsed entity is well-formed if it matches the
9274 * production labeled extParsedEnt.
9276 * [78] extParsedEnt ::= TextDecl? content
9278 * Returns 0, -1 in case of error. the parser context is augmented
9279 * as a result of the parsing.
9283 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9285 xmlCharEncoding enc;
9287 if ((ctxt == NULL) || (ctxt->input == NULL))
9290 xmlDefaultSAXHandlerInit();
9292 xmlDetectSAX2(ctxt);
9297 * SAX: beginning of the document processing.
9299 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9300 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9303 * Get the 4 first bytes and decode the charset
9304 * if enc != XML_CHAR_ENCODING_NONE
9305 * plug some encoding conversion routines.
9307 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9312 enc = xmlDetectCharEncoding(start, 4);
9313 if (enc != XML_CHAR_ENCODING_NONE) {
9314 xmlSwitchEncoding(ctxt, enc);
9320 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9324 * Check for the XMLDecl in the Prolog.
9327 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9330 * Note that we will switch encoding on the fly.
9332 xmlParseXMLDecl(ctxt);
9333 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9335 * The XML REC instructs us to stop parsing right here
9341 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9343 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9344 ctxt->sax->startDocument(ctxt->userData);
9347 * Doing validity checking on chunk doesn't make sense
9349 ctxt->instate = XML_PARSER_CONTENT;
9351 ctxt->loadsubset = 0;
9354 xmlParseContent(ctxt);
9356 if ((RAW == '<') && (NXT(1) == '/')) {
9357 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9358 } else if (RAW != 0) {
9359 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9363 * SAX: end of the document processing.
9365 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9366 ctxt->sax->endDocument(ctxt->userData);
9368 if (! ctxt->wellFormed) return(-1);
9372 #ifdef LIBXML_PUSH_ENABLED
9373 /************************************************************************
9375 * Progressive parsing interfaces *
9377 ************************************************************************/
9380 * xmlParseLookupSequence:
9381 * @ctxt: an XML parser context
9382 * @first: the first char to lookup
9383 * @next: the next char to lookup or zero
9384 * @third: the next char to lookup or zero
9386 * Try to find if a sequence (first, next, third) or just (first next) or
9387 * (first) is available in the input stream.
9388 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9389 * to avoid rescanning sequences of bytes, it DOES change the state of the
9390 * parser, do not use liberally.
9392 * Returns the index to the current parsing point if the full sequence
9393 * is available, -1 otherwise.
9396 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9397 xmlChar next, xmlChar third) {
9399 xmlParserInputPtr in;
9403 if (in == NULL) return(-1);
9404 base = in->cur - in->base;
9405 if (base < 0) return(-1);
9406 if (ctxt->checkIndex > base)
9407 base = ctxt->checkIndex;
9408 if (in->buf == NULL) {
9412 buf = in->buf->buffer->content;
9413 len = in->buf->buffer->use;
9415 /* take into account the sequence length */
9416 if (third) len -= 2;
9417 else if (next) len --;
9418 for (;base < len;base++) {
9419 if (buf[base] == first) {
9421 if ((buf[base + 1] != next) ||
9422 (buf[base + 2] != third)) continue;
9423 } else if (next != 0) {
9424 if (buf[base + 1] != next) continue;
9426 ctxt->checkIndex = 0;
9429 xmlGenericError(xmlGenericErrorContext,
9430 "PP: lookup '%c' found at %d\n",
9432 else if (third == 0)
9433 xmlGenericError(xmlGenericErrorContext,
9434 "PP: lookup '%c%c' found at %d\n",
9437 xmlGenericError(xmlGenericErrorContext,
9438 "PP: lookup '%c%c%c' found at %d\n",
9439 first, next, third, base);
9441 return(base - (in->cur - in->base));
9444 ctxt->checkIndex = base;
9447 xmlGenericError(xmlGenericErrorContext,
9448 "PP: lookup '%c' failed\n", first);
9449 else if (third == 0)
9450 xmlGenericError(xmlGenericErrorContext,
9451 "PP: lookup '%c%c' failed\n", first, next);
9453 xmlGenericError(xmlGenericErrorContext,
9454 "PP: lookup '%c%c%c' failed\n", first, next, third);
9461 * @ctxt: an XML parser context
9462 * @lastlt: pointer to store the last '<' from the input
9463 * @lastgt: pointer to store the last '>' from the input
9465 * Lookup the last < and > in the current chunk
9468 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9469 const xmlChar **lastgt) {
9472 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9473 xmlGenericError(xmlGenericErrorContext,
9474 "Internal error: xmlParseGetLasts\n");
9477 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9478 tmp = ctxt->input->end;
9480 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9481 if (tmp < ctxt->input->base) {
9487 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9490 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9491 if (tmp < ctxt->input->end) tmp++;
9492 } else if (*tmp == '"') {
9494 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9495 if (tmp < ctxt->input->end) tmp++;
9499 if (tmp < ctxt->input->end)
9504 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9505 if (tmp >= ctxt->input->base)
9517 * xmlCheckCdataPush:
9518 * @cur: pointer to the bock of characters
9519 * @len: length of the block in bytes
9521 * Check that the block of characters is okay as SCdata content [20]
9523 * Returns the number of bytes to pass if okay, a negative index where an
9524 * UTF-8 error occured otherwise
9527 xmlCheckCdataPush(const xmlChar *utf, int len) {
9532 if ((utf == NULL) || (len <= 0))
9535 for (ix = 0; ix < len;) { /* string is 0-terminated */
9537 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9540 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9544 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9545 if (ix + 2 > len) return(ix);
9546 if ((utf[ix+1] & 0xc0 ) != 0x80)
9548 codepoint = (utf[ix] & 0x1f) << 6;
9549 codepoint |= utf[ix+1] & 0x3f;
9550 if (!xmlIsCharQ(codepoint))
9553 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9554 if (ix + 3 > len) return(ix);
9555 if (((utf[ix+1] & 0xc0) != 0x80) ||
9556 ((utf[ix+2] & 0xc0) != 0x80))
9558 codepoint = (utf[ix] & 0xf) << 12;
9559 codepoint |= (utf[ix+1] & 0x3f) << 6;
9560 codepoint |= utf[ix+2] & 0x3f;
9561 if (!xmlIsCharQ(codepoint))
9564 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9565 if (ix + 4 > len) return(ix);
9566 if (((utf[ix+1] & 0xc0) != 0x80) ||
9567 ((utf[ix+2] & 0xc0) != 0x80) ||
9568 ((utf[ix+3] & 0xc0) != 0x80))
9570 codepoint = (utf[ix] & 0x7) << 18;
9571 codepoint |= (utf[ix+1] & 0x3f) << 12;
9572 codepoint |= (utf[ix+2] & 0x3f) << 6;
9573 codepoint |= utf[ix+3] & 0x3f;
9574 if (!xmlIsCharQ(codepoint))
9577 } else /* unknown encoding */
9584 * xmlParseTryOrFinish:
9585 * @ctxt: an XML parser context
9586 * @terminate: last chunk indicator
9588 * Try to progress on parsing
9590 * Returns zero if no parsing was possible
9593 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9597 const xmlChar *lastlt, *lastgt;
9599 if (ctxt->input == NULL)
9603 switch (ctxt->instate) {
9604 case XML_PARSER_EOF:
9605 xmlGenericError(xmlGenericErrorContext,
9606 "PP: try EOF\n"); break;
9607 case XML_PARSER_START:
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: try START\n"); break;
9610 case XML_PARSER_MISC:
9611 xmlGenericError(xmlGenericErrorContext,
9612 "PP: try MISC\n");break;
9613 case XML_PARSER_COMMENT:
9614 xmlGenericError(xmlGenericErrorContext,
9615 "PP: try COMMENT\n");break;
9616 case XML_PARSER_PROLOG:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: try PROLOG\n");break;
9619 case XML_PARSER_START_TAG:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: try START_TAG\n");break;
9622 case XML_PARSER_CONTENT:
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: try CONTENT\n");break;
9625 case XML_PARSER_CDATA_SECTION:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: try CDATA_SECTION\n");break;
9628 case XML_PARSER_END_TAG:
9629 xmlGenericError(xmlGenericErrorContext,
9630 "PP: try END_TAG\n");break;
9631 case XML_PARSER_ENTITY_DECL:
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: try ENTITY_DECL\n");break;
9634 case XML_PARSER_ENTITY_VALUE:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: try ENTITY_VALUE\n");break;
9637 case XML_PARSER_ATTRIBUTE_VALUE:
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: try ATTRIBUTE_VALUE\n");break;
9640 case XML_PARSER_DTD:
9641 xmlGenericError(xmlGenericErrorContext,
9642 "PP: try DTD\n");break;
9643 case XML_PARSER_EPILOG:
9644 xmlGenericError(xmlGenericErrorContext,
9645 "PP: try EPILOG\n");break;
9647 xmlGenericError(xmlGenericErrorContext,
9648 "PP: try PI\n");break;
9649 case XML_PARSER_IGNORE:
9650 xmlGenericError(xmlGenericErrorContext,
9651 "PP: try IGNORE\n");break;
9655 if ((ctxt->input != NULL) &&
9656 (ctxt->input->cur - ctxt->input->base > 4096)) {
9658 ctxt->checkIndex = 0;
9660 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9663 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9668 * Pop-up of finished entities.
9670 while ((RAW == 0) && (ctxt->inputNr > 1))
9673 if (ctxt->input == NULL) break;
9674 if (ctxt->input->buf == NULL)
9675 avail = ctxt->input->length -
9676 (ctxt->input->cur - ctxt->input->base);
9679 * If we are operating on converted input, try to flush
9680 * remainng chars to avoid them stalling in the non-converted
9683 if ((ctxt->input->buf->raw != NULL) &&
9684 (ctxt->input->buf->raw->use > 0)) {
9685 int base = ctxt->input->base -
9686 ctxt->input->buf->buffer->content;
9687 int current = ctxt->input->cur - ctxt->input->base;
9689 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9690 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9691 ctxt->input->cur = ctxt->input->base + current;
9693 &ctxt->input->buf->buffer->content[
9694 ctxt->input->buf->buffer->use];
9696 avail = ctxt->input->buf->buffer->use -
9697 (ctxt->input->cur - ctxt->input->base);
9701 switch (ctxt->instate) {
9702 case XML_PARSER_EOF:
9704 * Document parsing is done !
9707 case XML_PARSER_START:
9708 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9710 xmlCharEncoding enc;
9713 * Very first chars read from the document flow.
9719 * Get the 4 first bytes and decode the charset
9720 * if enc != XML_CHAR_ENCODING_NONE
9721 * plug some encoding conversion routines,
9722 * else xmlSwitchEncoding will set to (default)
9729 enc = xmlDetectCharEncoding(start, 4);
9730 xmlSwitchEncoding(ctxt, enc);
9736 cur = ctxt->input->cur[0];
9737 next = ctxt->input->cur[1];
9739 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9740 ctxt->sax->setDocumentLocator(ctxt->userData,
9741 &xmlDefaultSAXLocator);
9742 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9743 ctxt->instate = XML_PARSER_EOF;
9745 xmlGenericError(xmlGenericErrorContext,
9746 "PP: entering EOF\n");
9748 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9749 ctxt->sax->endDocument(ctxt->userData);
9752 if ((cur == '<') && (next == '?')) {
9753 /* PI or XML decl */
9754 if (avail < 5) return(ret);
9756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9758 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9759 ctxt->sax->setDocumentLocator(ctxt->userData,
9760 &xmlDefaultSAXLocator);
9761 if ((ctxt->input->cur[2] == 'x') &&
9762 (ctxt->input->cur[3] == 'm') &&
9763 (ctxt->input->cur[4] == 'l') &&
9764 (IS_BLANK_CH(ctxt->input->cur[5]))) {
9767 xmlGenericError(xmlGenericErrorContext,
9768 "PP: Parsing XML Decl\n");
9770 xmlParseXMLDecl(ctxt);
9771 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9773 * The XML REC instructs us to stop parsing right
9776 ctxt->instate = XML_PARSER_EOF;
9779 ctxt->standalone = ctxt->input->standalone;
9780 if ((ctxt->encoding == NULL) &&
9781 (ctxt->input->encoding != NULL))
9782 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9783 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9784 (!ctxt->disableSAX))
9785 ctxt->sax->startDocument(ctxt->userData);
9786 ctxt->instate = XML_PARSER_MISC;
9788 xmlGenericError(xmlGenericErrorContext,
9789 "PP: entering MISC\n");
9792 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9793 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9794 (!ctxt->disableSAX))
9795 ctxt->sax->startDocument(ctxt->userData);
9796 ctxt->instate = XML_PARSER_MISC;
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: entering MISC\n");
9803 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9804 ctxt->sax->setDocumentLocator(ctxt->userData,
9805 &xmlDefaultSAXLocator);
9806 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9807 if (ctxt->version == NULL) {
9808 xmlErrMemory(ctxt, NULL);
9811 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9812 (!ctxt->disableSAX))
9813 ctxt->sax->startDocument(ctxt->userData);
9814 ctxt->instate = XML_PARSER_MISC;
9816 xmlGenericError(xmlGenericErrorContext,
9817 "PP: entering MISC\n");
9821 case XML_PARSER_START_TAG: {
9822 const xmlChar *name;
9823 const xmlChar *prefix;
9825 int nsNr = ctxt->nsNr;
9827 if ((avail < 2) && (ctxt->inputNr == 1))
9829 cur = ctxt->input->cur[0];
9831 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9832 ctxt->instate = XML_PARSER_EOF;
9833 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9834 ctxt->sax->endDocument(ctxt->userData);
9838 if (ctxt->progressive) {
9839 /* > can be found unescaped in attribute values */
9840 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
9842 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9846 if (ctxt->spaceNr == 0)
9847 spacePush(ctxt, -1);
9848 else if (*ctxt->space == -2)
9849 spacePush(ctxt, -1);
9851 spacePush(ctxt, *ctxt->space);
9852 #ifdef LIBXML_SAX1_ENABLED
9854 #endif /* LIBXML_SAX1_ENABLED */
9855 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9856 #ifdef LIBXML_SAX1_ENABLED
9858 name = xmlParseStartTag(ctxt);
9859 #endif /* LIBXML_SAX1_ENABLED */
9862 ctxt->instate = XML_PARSER_EOF;
9863 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9864 ctxt->sax->endDocument(ctxt->userData);
9867 #ifdef LIBXML_VALID_ENABLED
9869 * [ VC: Root Element Type ]
9870 * The Name in the document type declaration must match
9871 * the element type of the root element.
9873 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9874 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9875 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9876 #endif /* LIBXML_VALID_ENABLED */
9879 * Check for an Empty Element.
9881 if ((RAW == '/') && (NXT(1) == '>')) {
9885 if ((ctxt->sax != NULL) &&
9886 (ctxt->sax->endElementNs != NULL) &&
9887 (!ctxt->disableSAX))
9888 ctxt->sax->endElementNs(ctxt->userData, name,
9890 if (ctxt->nsNr - nsNr > 0)
9891 nsPop(ctxt, ctxt->nsNr - nsNr);
9892 #ifdef LIBXML_SAX1_ENABLED
9894 if ((ctxt->sax != NULL) &&
9895 (ctxt->sax->endElement != NULL) &&
9896 (!ctxt->disableSAX))
9897 ctxt->sax->endElement(ctxt->userData, name);
9898 #endif /* LIBXML_SAX1_ENABLED */
9901 if (ctxt->nameNr == 0) {
9902 ctxt->instate = XML_PARSER_EPILOG;
9904 ctxt->instate = XML_PARSER_CONTENT;
9911 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
9912 "Couldn't find end of Start Tag %s\n",
9918 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9919 #ifdef LIBXML_SAX1_ENABLED
9921 namePush(ctxt, name);
9922 #endif /* LIBXML_SAX1_ENABLED */
9924 ctxt->instate = XML_PARSER_CONTENT;
9927 case XML_PARSER_CONTENT: {
9928 const xmlChar *test;
9930 if ((avail < 2) && (ctxt->inputNr == 1))
9932 cur = ctxt->input->cur[0];
9933 next = ctxt->input->cur[1];
9936 cons = ctxt->input->consumed;
9937 if ((cur == '<') && (next == '/')) {
9938 ctxt->instate = XML_PARSER_END_TAG;
9940 } else if ((cur == '<') && (next == '?')) {
9942 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9945 } else if ((cur == '<') && (next != '!')) {
9946 ctxt->instate = XML_PARSER_START_TAG;
9948 } else if ((cur == '<') && (next == '!') &&
9949 (ctxt->input->cur[2] == '-') &&
9950 (ctxt->input->cur[3] == '-')) {
9955 ctxt->input->cur += 4;
9956 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9957 ctxt->input->cur -= 4;
9958 if ((!terminate) && (term < 0))
9960 xmlParseComment(ctxt);
9961 ctxt->instate = XML_PARSER_CONTENT;
9962 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9963 (ctxt->input->cur[2] == '[') &&
9964 (ctxt->input->cur[3] == 'C') &&
9965 (ctxt->input->cur[4] == 'D') &&
9966 (ctxt->input->cur[5] == 'A') &&
9967 (ctxt->input->cur[6] == 'T') &&
9968 (ctxt->input->cur[7] == 'A') &&
9969 (ctxt->input->cur[8] == '[')) {
9971 ctxt->instate = XML_PARSER_CDATA_SECTION;
9973 } else if ((cur == '<') && (next == '!') &&
9976 } else if (cur == '&') {
9978 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9980 xmlParseReference(ctxt);
9982 /* TODO Avoid the extra copy, handle directly !!! */
9984 * Goal of the following test is:
9985 * - minimize calls to the SAX 'character' callback
9986 * when they are mergeable
9987 * - handle an problem for isBlank when we only parse
9988 * a sequence of blank chars and the next one is
9989 * not available to check against '<' presence.
9990 * - tries to homogenize the differences in SAX
9991 * callbacks between the push and pull versions
9994 if ((ctxt->inputNr == 1) &&
9995 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9997 if (ctxt->progressive) {
9998 if ((lastlt == NULL) ||
9999 (ctxt->input->cur > lastlt))
10001 } else if (xmlParseLookupSequence(ctxt,
10007 ctxt->checkIndex = 0;
10008 xmlParseCharData(ctxt, 0);
10011 * Pop-up of finished entities.
10013 while ((RAW == 0) && (ctxt->inputNr > 1))
10015 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10016 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10017 "detected an error in element content\n");
10018 ctxt->instate = XML_PARSER_EOF;
10023 case XML_PARSER_END_TAG:
10027 if (ctxt->progressive) {
10028 /* > can be found unescaped in attribute values */
10029 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10031 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10036 xmlParseEndTag2(ctxt,
10037 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10038 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10039 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10042 #ifdef LIBXML_SAX1_ENABLED
10044 xmlParseEndTag1(ctxt, 0);
10045 #endif /* LIBXML_SAX1_ENABLED */
10046 if (ctxt->nameNr == 0) {
10047 ctxt->instate = XML_PARSER_EPILOG;
10049 ctxt->instate = XML_PARSER_CONTENT;
10052 case XML_PARSER_CDATA_SECTION: {
10054 * The Push mode need to have the SAX callback for
10055 * cdataBlock merge back contiguous callbacks.
10059 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10061 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10064 tmp = xmlCheckCdataPush(ctxt->input->cur,
10065 XML_PARSER_BIG_BUFFER_SIZE);
10068 ctxt->input->cur += tmp;
10069 goto encoding_error;
10071 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10072 if (ctxt->sax->cdataBlock != NULL)
10073 ctxt->sax->cdataBlock(ctxt->userData,
10074 ctxt->input->cur, tmp);
10075 else if (ctxt->sax->characters != NULL)
10076 ctxt->sax->characters(ctxt->userData,
10077 ctxt->input->cur, tmp);
10080 ctxt->checkIndex = 0;
10086 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10087 if ((tmp < 0) || (tmp != base)) {
10089 ctxt->input->cur += tmp;
10090 goto encoding_error;
10092 if ((ctxt->sax != NULL) && (base > 0) &&
10093 (!ctxt->disableSAX)) {
10094 if (ctxt->sax->cdataBlock != NULL)
10095 ctxt->sax->cdataBlock(ctxt->userData,
10096 ctxt->input->cur, base);
10097 else if (ctxt->sax->characters != NULL)
10098 ctxt->sax->characters(ctxt->userData,
10099 ctxt->input->cur, base);
10102 ctxt->checkIndex = 0;
10103 ctxt->instate = XML_PARSER_CONTENT;
10105 xmlGenericError(xmlGenericErrorContext,
10106 "PP: entering CONTENT\n");
10111 case XML_PARSER_MISC:
10113 if (ctxt->input->buf == NULL)
10114 avail = ctxt->input->length -
10115 (ctxt->input->cur - ctxt->input->base);
10117 avail = ctxt->input->buf->buffer->use -
10118 (ctxt->input->cur - ctxt->input->base);
10121 cur = ctxt->input->cur[0];
10122 next = ctxt->input->cur[1];
10123 if ((cur == '<') && (next == '?')) {
10124 if ((!terminate) &&
10125 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10128 xmlGenericError(xmlGenericErrorContext,
10129 "PP: Parsing PI\n");
10132 } else if ((cur == '<') && (next == '!') &&
10133 (ctxt->input->cur[2] == '-') &&
10134 (ctxt->input->cur[3] == '-')) {
10135 if ((!terminate) &&
10136 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10139 xmlGenericError(xmlGenericErrorContext,
10140 "PP: Parsing Comment\n");
10142 xmlParseComment(ctxt);
10143 ctxt->instate = XML_PARSER_MISC;
10144 } else if ((cur == '<') && (next == '!') &&
10145 (ctxt->input->cur[2] == 'D') &&
10146 (ctxt->input->cur[3] == 'O') &&
10147 (ctxt->input->cur[4] == 'C') &&
10148 (ctxt->input->cur[5] == 'T') &&
10149 (ctxt->input->cur[6] == 'Y') &&
10150 (ctxt->input->cur[7] == 'P') &&
10151 (ctxt->input->cur[8] == 'E')) {
10152 if ((!terminate) &&
10153 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10156 xmlGenericError(xmlGenericErrorContext,
10157 "PP: Parsing internal subset\n");
10159 ctxt->inSubset = 1;
10160 xmlParseDocTypeDecl(ctxt);
10162 ctxt->instate = XML_PARSER_DTD;
10164 xmlGenericError(xmlGenericErrorContext,
10165 "PP: entering DTD\n");
10169 * Create and update the external subset.
10171 ctxt->inSubset = 2;
10172 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10173 (ctxt->sax->externalSubset != NULL))
10174 ctxt->sax->externalSubset(ctxt->userData,
10175 ctxt->intSubName, ctxt->extSubSystem,
10177 ctxt->inSubset = 0;
10178 ctxt->instate = XML_PARSER_PROLOG;
10180 xmlGenericError(xmlGenericErrorContext,
10181 "PP: entering PROLOG\n");
10184 } else if ((cur == '<') && (next == '!') &&
10188 ctxt->instate = XML_PARSER_START_TAG;
10189 ctxt->progressive = 1;
10190 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10192 xmlGenericError(xmlGenericErrorContext,
10193 "PP: entering START_TAG\n");
10197 case XML_PARSER_PROLOG:
10199 if (ctxt->input->buf == NULL)
10200 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10202 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10205 cur = ctxt->input->cur[0];
10206 next = ctxt->input->cur[1];
10207 if ((cur == '<') && (next == '?')) {
10208 if ((!terminate) &&
10209 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10212 xmlGenericError(xmlGenericErrorContext,
10213 "PP: Parsing PI\n");
10216 } else if ((cur == '<') && (next == '!') &&
10217 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10218 if ((!terminate) &&
10219 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10222 xmlGenericError(xmlGenericErrorContext,
10223 "PP: Parsing Comment\n");
10225 xmlParseComment(ctxt);
10226 ctxt->instate = XML_PARSER_PROLOG;
10227 } else if ((cur == '<') && (next == '!') &&
10231 ctxt->instate = XML_PARSER_START_TAG;
10232 if (ctxt->progressive == 0)
10233 ctxt->progressive = 1;
10234 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10236 xmlGenericError(xmlGenericErrorContext,
10237 "PP: entering START_TAG\n");
10241 case XML_PARSER_EPILOG:
10243 if (ctxt->input->buf == NULL)
10244 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10246 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10249 cur = ctxt->input->cur[0];
10250 next = ctxt->input->cur[1];
10251 if ((cur == '<') && (next == '?')) {
10252 if ((!terminate) &&
10253 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10256 xmlGenericError(xmlGenericErrorContext,
10257 "PP: Parsing PI\n");
10260 ctxt->instate = XML_PARSER_EPILOG;
10261 } else if ((cur == '<') && (next == '!') &&
10262 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10263 if ((!terminate) &&
10264 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10267 xmlGenericError(xmlGenericErrorContext,
10268 "PP: Parsing Comment\n");
10270 xmlParseComment(ctxt);
10271 ctxt->instate = XML_PARSER_EPILOG;
10272 } else if ((cur == '<') && (next == '!') &&
10276 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10277 ctxt->instate = XML_PARSER_EOF;
10279 xmlGenericError(xmlGenericErrorContext,
10280 "PP: entering EOF\n");
10282 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10283 ctxt->sax->endDocument(ctxt->userData);
10287 case XML_PARSER_DTD: {
10289 * Sorry but progressive parsing of the internal subset
10290 * is not expected to be supported. We first check that
10291 * the full content of the internal subset is available and
10292 * the parsing is launched only at that point.
10293 * Internal subset ends up with "']' S? '>'" in an unescaped
10294 * section and not in a ']]>' sequence which are conditional
10295 * sections (whoever argued to keep that crap in XML deserve
10296 * a place in hell !).
10302 base = ctxt->input->cur - ctxt->input->base;
10303 if (base < 0) return(0);
10304 if (ctxt->checkIndex > base)
10305 base = ctxt->checkIndex;
10306 buf = ctxt->input->buf->buffer->content;
10307 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10310 if (buf[base] == quote)
10314 if ((quote == 0) && (buf[base] == '<')) {
10316 /* special handling of comments */
10317 if (((unsigned int) base + 4 <
10318 ctxt->input->buf->buffer->use) &&
10319 (buf[base + 1] == '!') &&
10320 (buf[base + 2] == '-') &&
10321 (buf[base + 3] == '-')) {
10322 for (;(unsigned int) base + 3 <
10323 ctxt->input->buf->buffer->use; base++) {
10324 if ((buf[base] == '-') &&
10325 (buf[base + 1] == '-') &&
10326 (buf[base + 2] == '>')) {
10334 fprintf(stderr, "unfinished comment\n");
10341 if (buf[base] == '"') {
10345 if (buf[base] == '\'') {
10349 if (buf[base] == ']') {
10351 fprintf(stderr, "%c%c%c%c: ", buf[base],
10352 buf[base + 1], buf[base + 2], buf[base + 3]);
10354 if ((unsigned int) base +1 >=
10355 ctxt->input->buf->buffer->use)
10357 if (buf[base + 1] == ']') {
10358 /* conditional crap, skip both ']' ! */
10363 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10365 if (buf[base + i] == '>') {
10367 fprintf(stderr, "found\n");
10369 goto found_end_int_subset;
10371 if (!IS_BLANK_CH(buf[base + i])) {
10373 fprintf(stderr, "not found\n");
10375 goto not_end_of_int_subset;
10379 fprintf(stderr, "end of stream\n");
10384 not_end_of_int_subset:
10385 continue; /* for */
10388 * We didn't found the end of the Internal subset
10392 xmlGenericError(xmlGenericErrorContext,
10393 "PP: lookup of int subset end filed\n");
10397 found_end_int_subset:
10398 xmlParseInternalSubset(ctxt);
10399 ctxt->inSubset = 2;
10400 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10401 (ctxt->sax->externalSubset != NULL))
10402 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10403 ctxt->extSubSystem, ctxt->extSubURI);
10404 ctxt->inSubset = 0;
10405 ctxt->instate = XML_PARSER_PROLOG;
10406 ctxt->checkIndex = 0;
10408 xmlGenericError(xmlGenericErrorContext,
10409 "PP: entering PROLOG\n");
10413 case XML_PARSER_COMMENT:
10414 xmlGenericError(xmlGenericErrorContext,
10415 "PP: internal error, state == COMMENT\n");
10416 ctxt->instate = XML_PARSER_CONTENT;
10418 xmlGenericError(xmlGenericErrorContext,
10419 "PP: entering CONTENT\n");
10422 case XML_PARSER_IGNORE:
10423 xmlGenericError(xmlGenericErrorContext,
10424 "PP: internal error, state == IGNORE");
10425 ctxt->instate = XML_PARSER_DTD;
10427 xmlGenericError(xmlGenericErrorContext,
10428 "PP: entering DTD\n");
10431 case XML_PARSER_PI:
10432 xmlGenericError(xmlGenericErrorContext,
10433 "PP: internal error, state == PI\n");
10434 ctxt->instate = XML_PARSER_CONTENT;
10436 xmlGenericError(xmlGenericErrorContext,
10437 "PP: entering CONTENT\n");
10440 case XML_PARSER_ENTITY_DECL:
10441 xmlGenericError(xmlGenericErrorContext,
10442 "PP: internal error, state == ENTITY_DECL\n");
10443 ctxt->instate = XML_PARSER_DTD;
10445 xmlGenericError(xmlGenericErrorContext,
10446 "PP: entering DTD\n");
10449 case XML_PARSER_ENTITY_VALUE:
10450 xmlGenericError(xmlGenericErrorContext,
10451 "PP: internal error, state == ENTITY_VALUE\n");
10452 ctxt->instate = XML_PARSER_CONTENT;
10454 xmlGenericError(xmlGenericErrorContext,
10455 "PP: entering DTD\n");
10458 case XML_PARSER_ATTRIBUTE_VALUE:
10459 xmlGenericError(xmlGenericErrorContext,
10460 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10461 ctxt->instate = XML_PARSER_START_TAG;
10463 xmlGenericError(xmlGenericErrorContext,
10464 "PP: entering START_TAG\n");
10467 case XML_PARSER_SYSTEM_LITERAL:
10468 xmlGenericError(xmlGenericErrorContext,
10469 "PP: internal error, state == SYSTEM_LITERAL\n");
10470 ctxt->instate = XML_PARSER_START_TAG;
10472 xmlGenericError(xmlGenericErrorContext,
10473 "PP: entering START_TAG\n");
10476 case XML_PARSER_PUBLIC_LITERAL:
10477 xmlGenericError(xmlGenericErrorContext,
10478 "PP: internal error, state == PUBLIC_LITERAL\n");
10479 ctxt->instate = XML_PARSER_START_TAG;
10481 xmlGenericError(xmlGenericErrorContext,
10482 "PP: entering START_TAG\n");
10489 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10496 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10497 ctxt->input->cur[0], ctxt->input->cur[1],
10498 ctxt->input->cur[2], ctxt->input->cur[3]);
10499 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10500 "Input is not proper UTF-8, indicate encoding !\n%s",
10501 BAD_CAST buffer, NULL);
10508 * @ctxt: an XML parser context
10509 * @chunk: an char array
10510 * @size: the size in byte of the chunk
10511 * @terminate: last chunk indicator
10513 * Parse a Chunk of memory
10515 * Returns zero if no error, the xmlParserErrors otherwise.
10518 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10523 return(XML_ERR_INTERNAL_ERROR);
10524 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10525 return(ctxt->errNo);
10526 if (ctxt->instate == XML_PARSER_START)
10527 xmlDetectSAX2(ctxt);
10528 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10529 (chunk[size - 1] == '\r')) {
10533 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10534 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10535 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10536 int cur = ctxt->input->cur - ctxt->input->base;
10539 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10541 ctxt->errNo = XML_PARSER_EOF;
10542 ctxt->disableSAX = 1;
10543 return (XML_PARSER_EOF);
10545 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10546 ctxt->input->cur = ctxt->input->base + cur;
10548 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10550 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10553 } else if (ctxt->instate != XML_PARSER_EOF) {
10554 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10555 xmlParserInputBufferPtr in = ctxt->input->buf;
10556 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10557 (in->raw != NULL)) {
10560 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10563 xmlGenericError(xmlGenericErrorContext,
10564 "xmlParseChunk: encoder error\n");
10565 return(XML_ERR_INVALID_ENCODING);
10570 xmlParseTryOrFinish(ctxt, terminate);
10571 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10572 (ctxt->input->buf != NULL)) {
10573 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10575 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10576 return(ctxt->errNo);
10579 * Check for termination
10583 if (ctxt->input != NULL) {
10584 if (ctxt->input->buf == NULL)
10585 avail = ctxt->input->length -
10586 (ctxt->input->cur - ctxt->input->base);
10588 avail = ctxt->input->buf->buffer->use -
10589 (ctxt->input->cur - ctxt->input->base);
10592 if ((ctxt->instate != XML_PARSER_EOF) &&
10593 (ctxt->instate != XML_PARSER_EPILOG)) {
10594 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10596 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10597 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10599 if (ctxt->instate != XML_PARSER_EOF) {
10600 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10601 ctxt->sax->endDocument(ctxt->userData);
10603 ctxt->instate = XML_PARSER_EOF;
10605 return((xmlParserErrors) ctxt->errNo);
10608 /************************************************************************
10610 * I/O front end functions to the parser *
10612 ************************************************************************/
10615 * xmlCreatePushParserCtxt:
10616 * @sax: a SAX handler
10617 * @user_data: The user data returned on SAX callbacks
10618 * @chunk: a pointer to an array of chars
10619 * @size: number of chars in the array
10620 * @filename: an optional file name or URI
10622 * Create a parser context for using the XML parser in push mode.
10623 * If @buffer and @size are non-NULL, the data is used to detect
10624 * the encoding. The remaining characters will be parsed so they
10625 * don't need to be fed in again through xmlParseChunk.
10626 * To allow content encoding detection, @size should be >= 4
10627 * The value of @filename is used for fetching external entities
10628 * and error/warning reports.
10630 * Returns the new parser context or NULL
10634 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10635 const char *chunk, int size, const char *filename) {
10636 xmlParserCtxtPtr ctxt;
10637 xmlParserInputPtr inputStream;
10638 xmlParserInputBufferPtr buf;
10639 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10642 * plug some encoding conversion routines
10644 if ((chunk != NULL) && (size >= 4))
10645 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10647 buf = xmlAllocParserInputBuffer(enc);
10648 if (buf == NULL) return(NULL);
10650 ctxt = xmlNewParserCtxt();
10651 if (ctxt == NULL) {
10652 xmlErrMemory(NULL, "creating parser: out of memory\n");
10653 xmlFreeParserInputBuffer(buf);
10656 ctxt->dictNames = 1;
10657 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10658 if (ctxt->pushTab == NULL) {
10659 xmlErrMemory(ctxt, NULL);
10660 xmlFreeParserInputBuffer(buf);
10661 xmlFreeParserCtxt(ctxt);
10665 #ifdef LIBXML_SAX1_ENABLED
10666 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10667 #endif /* LIBXML_SAX1_ENABLED */
10668 xmlFree(ctxt->sax);
10669 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10670 if (ctxt->sax == NULL) {
10671 xmlErrMemory(ctxt, NULL);
10672 xmlFreeParserInputBuffer(buf);
10673 xmlFreeParserCtxt(ctxt);
10676 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10677 if (sax->initialized == XML_SAX2_MAGIC)
10678 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10680 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10681 if (user_data != NULL)
10682 ctxt->userData = user_data;
10684 if (filename == NULL) {
10685 ctxt->directory = NULL;
10687 ctxt->directory = xmlParserGetDirectory(filename);
10690 inputStream = xmlNewInputStream(ctxt);
10691 if (inputStream == NULL) {
10692 xmlFreeParserCtxt(ctxt);
10693 xmlFreeParserInputBuffer(buf);
10697 if (filename == NULL)
10698 inputStream->filename = NULL;
10700 inputStream->filename = (char *)
10701 xmlCanonicPath((const xmlChar *) filename);
10702 if (inputStream->filename == NULL) {
10703 xmlFreeParserCtxt(ctxt);
10704 xmlFreeParserInputBuffer(buf);
10708 inputStream->buf = buf;
10709 inputStream->base = inputStream->buf->buffer->content;
10710 inputStream->cur = inputStream->buf->buffer->content;
10712 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10714 inputPush(ctxt, inputStream);
10717 * If the caller didn't provide an initial 'chunk' for determining
10718 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10719 * that it can be automatically determined later
10721 if ((size == 0) || (chunk == NULL)) {
10722 ctxt->charset = XML_CHAR_ENCODING_NONE;
10723 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10724 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10725 int cur = ctxt->input->cur - ctxt->input->base;
10727 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10729 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10730 ctxt->input->cur = ctxt->input->base + cur;
10732 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10734 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10738 if (enc != XML_CHAR_ENCODING_NONE) {
10739 xmlSwitchEncoding(ctxt, enc);
10744 #endif /* LIBXML_PUSH_ENABLED */
10748 * @ctxt: an XML parser context
10750 * Blocks further parser processing
10753 xmlStopParser(xmlParserCtxtPtr ctxt) {
10756 ctxt->instate = XML_PARSER_EOF;
10757 ctxt->disableSAX = 1;
10758 if (ctxt->input != NULL) {
10759 ctxt->input->cur = BAD_CAST"";
10760 ctxt->input->base = ctxt->input->cur;
10765 * xmlCreateIOParserCtxt:
10766 * @sax: a SAX handler
10767 * @user_data: The user data returned on SAX callbacks
10768 * @ioread: an I/O read function
10769 * @ioclose: an I/O close function
10770 * @ioctx: an I/O handler
10771 * @enc: the charset encoding if known
10773 * Create a parser context for using the XML parser with an existing
10776 * Returns the new parser context or NULL
10779 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10780 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10781 void *ioctx, xmlCharEncoding enc) {
10782 xmlParserCtxtPtr ctxt;
10783 xmlParserInputPtr inputStream;
10784 xmlParserInputBufferPtr buf;
10786 if (ioread == NULL) return(NULL);
10788 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10789 if (buf == NULL) return(NULL);
10791 ctxt = xmlNewParserCtxt();
10792 if (ctxt == NULL) {
10793 xmlFreeParserInputBuffer(buf);
10797 #ifdef LIBXML_SAX1_ENABLED
10798 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10799 #endif /* LIBXML_SAX1_ENABLED */
10800 xmlFree(ctxt->sax);
10801 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10802 if (ctxt->sax == NULL) {
10803 xmlErrMemory(ctxt, NULL);
10804 xmlFreeParserCtxt(ctxt);
10807 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10808 if (sax->initialized == XML_SAX2_MAGIC)
10809 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10811 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10812 if (user_data != NULL)
10813 ctxt->userData = user_data;
10816 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10817 if (inputStream == NULL) {
10818 xmlFreeParserCtxt(ctxt);
10821 inputPush(ctxt, inputStream);
10826 #ifdef LIBXML_VALID_ENABLED
10827 /************************************************************************
10829 * Front ends when parsing a DTD *
10831 ************************************************************************/
10835 * @sax: the SAX handler block or NULL
10836 * @input: an Input Buffer
10837 * @enc: the charset encoding if known
10839 * Load and parse a DTD
10841 * Returns the resulting xmlDtdPtr or NULL in case of error.
10842 * @input will be freed by the function in any case.
10846 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10847 xmlCharEncoding enc) {
10848 xmlDtdPtr ret = NULL;
10849 xmlParserCtxtPtr ctxt;
10850 xmlParserInputPtr pinput = NULL;
10856 ctxt = xmlNewParserCtxt();
10857 if (ctxt == NULL) {
10858 xmlFreeParserInputBuffer(input);
10863 * Set-up the SAX context
10866 if (ctxt->sax != NULL)
10867 xmlFree(ctxt->sax);
10869 ctxt->userData = ctxt;
10871 xmlDetectSAX2(ctxt);
10874 * generate a parser input from the I/O handler
10877 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
10878 if (pinput == NULL) {
10879 if (sax != NULL) ctxt->sax = NULL;
10880 xmlFreeParserInputBuffer(input);
10881 xmlFreeParserCtxt(ctxt);
10886 * plug some encoding conversion routines here.
10888 xmlPushInput(ctxt, pinput);
10889 if (enc != XML_CHAR_ENCODING_NONE) {
10890 xmlSwitchEncoding(ctxt, enc);
10893 pinput->filename = NULL;
10896 pinput->base = ctxt->input->cur;
10897 pinput->cur = ctxt->input->cur;
10898 pinput->free = NULL;
10901 * let's parse that entity knowing it's an external subset.
10903 ctxt->inSubset = 2;
10904 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10905 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10906 BAD_CAST "none", BAD_CAST "none");
10908 if ((enc == XML_CHAR_ENCODING_NONE) &&
10909 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10911 * Get the 4 first bytes and decode the charset
10912 * if enc != XML_CHAR_ENCODING_NONE
10913 * plug some encoding conversion routines.
10919 enc = xmlDetectCharEncoding(start, 4);
10920 if (enc != XML_CHAR_ENCODING_NONE) {
10921 xmlSwitchEncoding(ctxt, enc);
10925 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10927 if (ctxt->myDoc != NULL) {
10928 if (ctxt->wellFormed) {
10929 ret = ctxt->myDoc->extSubset;
10930 ctxt->myDoc->extSubset = NULL;
10935 tmp = ret->children;
10936 while (tmp != NULL) {
10944 xmlFreeDoc(ctxt->myDoc);
10945 ctxt->myDoc = NULL;
10947 if (sax != NULL) ctxt->sax = NULL;
10948 xmlFreeParserCtxt(ctxt);
10955 * @sax: the SAX handler block
10956 * @ExternalID: a NAME* containing the External ID of the DTD
10957 * @SystemID: a NAME* containing the URL to the DTD
10959 * Load and parse an external subset.
10961 * Returns the resulting xmlDtdPtr or NULL in case of error.
10965 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10966 const xmlChar *SystemID) {
10967 xmlDtdPtr ret = NULL;
10968 xmlParserCtxtPtr ctxt;
10969 xmlParserInputPtr input = NULL;
10970 xmlCharEncoding enc;
10971 xmlChar* systemIdCanonic;
10973 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10975 ctxt = xmlNewParserCtxt();
10976 if (ctxt == NULL) {
10981 * Set-up the SAX context
10984 if (ctxt->sax != NULL)
10985 xmlFree(ctxt->sax);
10987 ctxt->userData = ctxt;
10991 * Canonicalise the system ID
10993 systemIdCanonic = xmlCanonicPath(SystemID);
10994 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
10995 xmlFreeParserCtxt(ctxt);
11000 * Ask the Entity resolver to load the damn thing
11003 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11004 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11006 if (input == NULL) {
11007 if (sax != NULL) ctxt->sax = NULL;
11008 xmlFreeParserCtxt(ctxt);
11009 if (systemIdCanonic != NULL)
11010 xmlFree(systemIdCanonic);
11015 * plug some encoding conversion routines here.
11017 xmlPushInput(ctxt, input);
11018 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11019 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11020 xmlSwitchEncoding(ctxt, enc);
11023 if (input->filename == NULL)
11024 input->filename = (char *) systemIdCanonic;
11026 xmlFree(systemIdCanonic);
11029 input->base = ctxt->input->cur;
11030 input->cur = ctxt->input->cur;
11031 input->free = NULL;
11034 * let's parse that entity knowing it's an external subset.
11036 ctxt->inSubset = 2;
11037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11038 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11039 ExternalID, SystemID);
11040 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11042 if (ctxt->myDoc != NULL) {
11043 if (ctxt->wellFormed) {
11044 ret = ctxt->myDoc->extSubset;
11045 ctxt->myDoc->extSubset = NULL;
11050 tmp = ret->children;
11051 while (tmp != NULL) {
11059 xmlFreeDoc(ctxt->myDoc);
11060 ctxt->myDoc = NULL;
11062 if (sax != NULL) ctxt->sax = NULL;
11063 xmlFreeParserCtxt(ctxt);
11071 * @ExternalID: a NAME* containing the External ID of the DTD
11072 * @SystemID: a NAME* containing the URL to the DTD
11074 * Load and parse an external subset.
11076 * Returns the resulting xmlDtdPtr or NULL in case of error.
11080 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11081 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11083 #endif /* LIBXML_VALID_ENABLED */
11085 /************************************************************************
11087 * Front ends when parsing an Entity *
11089 ************************************************************************/
11092 * xmlParseCtxtExternalEntity:
11093 * @ctx: the existing parsing context
11094 * @URL: the URL for the entity to load
11095 * @ID: the System ID for the entity to load
11096 * @lst: the return value for the set of parsed nodes
11098 * Parse an external general entity within an existing parsing context
11099 * An external general parsed entity is well-formed if it matches the
11100 * production labeled extParsedEnt.
11102 * [78] extParsedEnt ::= TextDecl? content
11104 * Returns 0 if the entity is well formed, -1 in case of args problem and
11105 * the parser error code otherwise
11109 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11110 const xmlChar *ID, xmlNodePtr *lst) {
11111 xmlParserCtxtPtr ctxt;
11113 xmlNodePtr newRoot;
11114 xmlSAXHandlerPtr oldsax = NULL;
11117 xmlCharEncoding enc;
11118 xmlParserInputPtr inputStream;
11119 char *directory = NULL;
11121 if (ctx == NULL) return(-1);
11123 if (ctx->depth > 40) {
11124 return(XML_ERR_ENTITY_LOOP);
11129 if ((URL == NULL) && (ID == NULL))
11131 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11134 ctxt = xmlNewParserCtxt();
11135 if (ctxt == NULL) {
11139 ctxt->userData = ctxt;
11140 ctxt->_private = ctx->_private;
11142 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11143 if (inputStream == NULL) {
11144 xmlFreeParserCtxt(ctxt);
11148 inputPush(ctxt, inputStream);
11150 if ((ctxt->directory == NULL) && (directory == NULL))
11151 directory = xmlParserGetDirectory((char *)URL);
11152 if ((ctxt->directory == NULL) && (directory != NULL))
11153 ctxt->directory = directory;
11155 oldsax = ctxt->sax;
11156 ctxt->sax = ctx->sax;
11157 xmlDetectSAX2(ctxt);
11158 newDoc = xmlNewDoc(BAD_CAST "1.0");
11159 if (newDoc == NULL) {
11160 xmlFreeParserCtxt(ctxt);
11163 if (ctx->myDoc->dict) {
11164 newDoc->dict = ctx->myDoc->dict;
11165 xmlDictReference(newDoc->dict);
11167 if (ctx->myDoc != NULL) {
11168 newDoc->intSubset = ctx->myDoc->intSubset;
11169 newDoc->extSubset = ctx->myDoc->extSubset;
11171 if (ctx->myDoc->URL != NULL) {
11172 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11174 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11175 if (newRoot == NULL) {
11176 ctxt->sax = oldsax;
11177 xmlFreeParserCtxt(ctxt);
11178 newDoc->intSubset = NULL;
11179 newDoc->extSubset = NULL;
11180 xmlFreeDoc(newDoc);
11183 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11184 nodePush(ctxt, newDoc->children);
11185 if (ctx->myDoc == NULL) {
11186 ctxt->myDoc = newDoc;
11188 ctxt->myDoc = ctx->myDoc;
11189 newDoc->children->doc = ctx->myDoc;
11193 * Get the 4 first bytes and decode the charset
11194 * if enc != XML_CHAR_ENCODING_NONE
11195 * plug some encoding conversion routines.
11198 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11203 enc = xmlDetectCharEncoding(start, 4);
11204 if (enc != XML_CHAR_ENCODING_NONE) {
11205 xmlSwitchEncoding(ctxt, enc);
11210 * Parse a possible text declaration first
11212 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11213 xmlParseTextDecl(ctxt);
11217 * Doing validity checking on chunk doesn't make sense
11219 ctxt->instate = XML_PARSER_CONTENT;
11220 ctxt->validate = ctx->validate;
11221 ctxt->valid = ctx->valid;
11222 ctxt->loadsubset = ctx->loadsubset;
11223 ctxt->depth = ctx->depth + 1;
11224 ctxt->replaceEntities = ctx->replaceEntities;
11225 if (ctxt->validate) {
11226 ctxt->vctxt.error = ctx->vctxt.error;
11227 ctxt->vctxt.warning = ctx->vctxt.warning;
11229 ctxt->vctxt.error = NULL;
11230 ctxt->vctxt.warning = NULL;
11232 ctxt->vctxt.nodeTab = NULL;
11233 ctxt->vctxt.nodeNr = 0;
11234 ctxt->vctxt.nodeMax = 0;
11235 ctxt->vctxt.node = NULL;
11236 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11237 ctxt->dict = ctx->dict;
11238 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11239 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11240 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11241 ctxt->dictNames = ctx->dictNames;
11242 ctxt->attsDefault = ctx->attsDefault;
11243 ctxt->attsSpecial = ctx->attsSpecial;
11244 ctxt->linenumbers = ctx->linenumbers;
11246 xmlParseContent(ctxt);
11248 ctx->validate = ctxt->validate;
11249 ctx->valid = ctxt->valid;
11250 if ((RAW == '<') && (NXT(1) == '/')) {
11251 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11252 } else if (RAW != 0) {
11253 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11255 if (ctxt->node != newDoc->children) {
11256 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11259 if (!ctxt->wellFormed) {
11260 if (ctxt->errNo == 0)
11269 * Return the newly created nodeset after unlinking it from
11270 * they pseudo parent.
11272 cur = newDoc->children->children;
11274 while (cur != NULL) {
11275 cur->parent = NULL;
11278 newDoc->children->children = NULL;
11282 ctxt->sax = oldsax;
11284 ctxt->attsDefault = NULL;
11285 ctxt->attsSpecial = NULL;
11286 xmlFreeParserCtxt(ctxt);
11287 newDoc->intSubset = NULL;
11288 newDoc->extSubset = NULL;
11289 xmlFreeDoc(newDoc);
11295 * xmlParseExternalEntityPrivate:
11296 * @doc: the document the chunk pertains to
11297 * @oldctxt: the previous parser context if available
11298 * @sax: the SAX handler bloc (possibly NULL)
11299 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11300 * @depth: Used for loop detection, use 0
11301 * @URL: the URL for the entity to load
11302 * @ID: the System ID for the entity to load
11303 * @list: the return value for the set of parsed nodes
11305 * Private version of xmlParseExternalEntity()
11307 * Returns 0 if the entity is well formed, -1 in case of args problem and
11308 * the parser error code otherwise
11311 static xmlParserErrors
11312 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11313 xmlSAXHandlerPtr sax,
11314 void *user_data, int depth, const xmlChar *URL,
11315 const xmlChar *ID, xmlNodePtr *list) {
11316 xmlParserCtxtPtr ctxt;
11318 xmlNodePtr newRoot;
11319 xmlSAXHandlerPtr oldsax = NULL;
11320 xmlParserErrors ret = XML_ERR_OK;
11322 xmlCharEncoding enc;
11325 return(XML_ERR_ENTITY_LOOP);
11332 if ((URL == NULL) && (ID == NULL))
11333 return(XML_ERR_INTERNAL_ERROR);
11335 return(XML_ERR_INTERNAL_ERROR);
11338 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11339 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11340 ctxt->userData = ctxt;
11341 if (oldctxt != NULL) {
11342 ctxt->_private = oldctxt->_private;
11343 ctxt->loadsubset = oldctxt->loadsubset;
11344 ctxt->validate = oldctxt->validate;
11345 ctxt->external = oldctxt->external;
11346 ctxt->record_info = oldctxt->record_info;
11347 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11348 ctxt->node_seq.length = oldctxt->node_seq.length;
11349 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11352 * Doing validity checking on chunk without context
11353 * doesn't make sense
11355 ctxt->_private = NULL;
11356 ctxt->validate = 0;
11357 ctxt->external = 2;
11358 ctxt->loadsubset = 0;
11361 oldsax = ctxt->sax;
11363 if (user_data != NULL)
11364 ctxt->userData = user_data;
11366 xmlDetectSAX2(ctxt);
11367 newDoc = xmlNewDoc(BAD_CAST "1.0");
11368 if (newDoc == NULL) {
11369 ctxt->node_seq.maximum = 0;
11370 ctxt->node_seq.length = 0;
11371 ctxt->node_seq.buffer = NULL;
11372 xmlFreeParserCtxt(ctxt);
11373 return(XML_ERR_INTERNAL_ERROR);
11375 newDoc->intSubset = doc->intSubset;
11376 newDoc->extSubset = doc->extSubset;
11377 newDoc->dict = doc->dict;
11378 xmlDictReference(newDoc->dict);
11380 if (doc->URL != NULL) {
11381 newDoc->URL = xmlStrdup(doc->URL);
11383 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11384 if (newRoot == NULL) {
11386 ctxt->sax = oldsax;
11387 ctxt->node_seq.maximum = 0;
11388 ctxt->node_seq.length = 0;
11389 ctxt->node_seq.buffer = NULL;
11390 xmlFreeParserCtxt(ctxt);
11391 newDoc->intSubset = NULL;
11392 newDoc->extSubset = NULL;
11393 xmlFreeDoc(newDoc);
11394 return(XML_ERR_INTERNAL_ERROR);
11396 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11397 nodePush(ctxt, newDoc->children);
11399 newRoot->doc = doc;
11402 * Get the 4 first bytes and decode the charset
11403 * if enc != XML_CHAR_ENCODING_NONE
11404 * plug some encoding conversion routines.
11407 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11412 enc = xmlDetectCharEncoding(start, 4);
11413 if (enc != XML_CHAR_ENCODING_NONE) {
11414 xmlSwitchEncoding(ctxt, enc);
11419 * Parse a possible text declaration first
11421 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11422 xmlParseTextDecl(ctxt);
11425 ctxt->instate = XML_PARSER_CONTENT;
11426 ctxt->depth = depth;
11428 xmlParseContent(ctxt);
11430 if ((RAW == '<') && (NXT(1) == '/')) {
11431 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11432 } else if (RAW != 0) {
11433 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11435 if (ctxt->node != newDoc->children) {
11436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11439 if (!ctxt->wellFormed) {
11440 if (ctxt->errNo == 0)
11441 ret = XML_ERR_INTERNAL_ERROR;
11443 ret = (xmlParserErrors)ctxt->errNo;
11445 if (list != NULL) {
11449 * Return the newly created nodeset after unlinking it from
11450 * they pseudo parent.
11452 cur = newDoc->children->children;
11454 while (cur != NULL) {
11455 cur->parent = NULL;
11458 newDoc->children->children = NULL;
11463 ctxt->sax = oldsax;
11464 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11465 oldctxt->node_seq.length = ctxt->node_seq.length;
11466 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11467 ctxt->node_seq.maximum = 0;
11468 ctxt->node_seq.length = 0;
11469 ctxt->node_seq.buffer = NULL;
11470 xmlFreeParserCtxt(ctxt);
11471 newDoc->intSubset = NULL;
11472 newDoc->extSubset = NULL;
11473 xmlFreeDoc(newDoc);
11478 #ifdef LIBXML_SAX1_ENABLED
11480 * xmlParseExternalEntity:
11481 * @doc: the document the chunk pertains to
11482 * @sax: the SAX handler bloc (possibly NULL)
11483 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11484 * @depth: Used for loop detection, use 0
11485 * @URL: the URL for the entity to load
11486 * @ID: the System ID for the entity to load
11487 * @lst: the return value for the set of parsed nodes
11489 * Parse an external general entity
11490 * An external general parsed entity is well-formed if it matches the
11491 * production labeled extParsedEnt.
11493 * [78] extParsedEnt ::= TextDecl? content
11495 * Returns 0 if the entity is well formed, -1 in case of args problem and
11496 * the parser error code otherwise
11500 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11501 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11502 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11507 * xmlParseBalancedChunkMemory:
11508 * @doc: the document the chunk pertains to
11509 * @sax: the SAX handler bloc (possibly NULL)
11510 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11511 * @depth: Used for loop detection, use 0
11512 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11513 * @lst: the return value for the set of parsed nodes
11515 * Parse a well-balanced chunk of an XML document
11516 * called by the parser
11517 * The allowed sequence for the Well Balanced Chunk is the one defined by
11518 * the content production in the XML grammar:
11520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11522 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11523 * the parser error code otherwise
11527 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11528 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11529 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11530 depth, string, lst, 0 );
11532 #endif /* LIBXML_SAX1_ENABLED */
11535 * xmlParseBalancedChunkMemoryInternal:
11536 * @oldctxt: the existing parsing context
11537 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11538 * @user_data: the user data field for the parser context
11539 * @lst: the return value for the set of parsed nodes
11542 * Parse a well-balanced chunk of an XML document
11543 * called by the parser
11544 * The allowed sequence for the Well Balanced Chunk is the one defined by
11545 * the content production in the XML grammar:
11547 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11549 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11550 * error code otherwise
11552 * In case recover is set to 1, the nodelist will not be empty even if
11553 * the parsed chunk is not well balanced.
11555 static xmlParserErrors
11556 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11557 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11558 xmlParserCtxtPtr ctxt;
11559 xmlDocPtr newDoc = NULL;
11560 xmlNodePtr newRoot;
11561 xmlSAXHandlerPtr oldsax = NULL;
11562 xmlNodePtr content = NULL;
11563 xmlNodePtr last = NULL;
11565 xmlParserErrors ret = XML_ERR_OK;
11567 if (oldctxt->depth > 40) {
11568 return(XML_ERR_ENTITY_LOOP);
11574 if (string == NULL)
11575 return(XML_ERR_INTERNAL_ERROR);
11577 size = xmlStrlen(string);
11579 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11580 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11581 if (user_data != NULL)
11582 ctxt->userData = user_data;
11584 ctxt->userData = ctxt;
11585 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11586 ctxt->dict = oldctxt->dict;
11587 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11588 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11589 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11591 oldsax = ctxt->sax;
11592 ctxt->sax = oldctxt->sax;
11593 xmlDetectSAX2(ctxt);
11594 ctxt->replaceEntities = oldctxt->replaceEntities;
11595 ctxt->options = oldctxt->options;
11597 ctxt->_private = oldctxt->_private;
11598 if (oldctxt->myDoc == NULL) {
11599 newDoc = xmlNewDoc(BAD_CAST "1.0");
11600 if (newDoc == NULL) {
11601 ctxt->sax = oldsax;
11603 xmlFreeParserCtxt(ctxt);
11604 return(XML_ERR_INTERNAL_ERROR);
11606 newDoc->dict = ctxt->dict;
11607 xmlDictReference(newDoc->dict);
11608 ctxt->myDoc = newDoc;
11610 ctxt->myDoc = oldctxt->myDoc;
11611 content = ctxt->myDoc->children;
11612 last = ctxt->myDoc->last;
11614 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11615 if (newRoot == NULL) {
11616 ctxt->sax = oldsax;
11618 xmlFreeParserCtxt(ctxt);
11619 if (newDoc != NULL) {
11620 xmlFreeDoc(newDoc);
11622 return(XML_ERR_INTERNAL_ERROR);
11624 ctxt->myDoc->children = NULL;
11625 ctxt->myDoc->last = NULL;
11626 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11627 nodePush(ctxt, ctxt->myDoc->children);
11628 ctxt->instate = XML_PARSER_CONTENT;
11629 ctxt->depth = oldctxt->depth + 1;
11631 ctxt->validate = 0;
11632 ctxt->loadsubset = oldctxt->loadsubset;
11633 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11635 * ID/IDREF registration will be done in xmlValidateElement below
11637 ctxt->loadsubset |= XML_SKIP_IDS;
11639 ctxt->dictNames = oldctxt->dictNames;
11640 ctxt->attsDefault = oldctxt->attsDefault;
11641 ctxt->attsSpecial = oldctxt->attsSpecial;
11643 xmlParseContent(ctxt);
11644 if ((RAW == '<') && (NXT(1) == '/')) {
11645 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11646 } else if (RAW != 0) {
11647 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11649 if (ctxt->node != ctxt->myDoc->children) {
11650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11653 if (!ctxt->wellFormed) {
11654 if (ctxt->errNo == 0)
11655 ret = XML_ERR_INTERNAL_ERROR;
11657 ret = (xmlParserErrors)ctxt->errNo;
11662 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11666 * Return the newly created nodeset after unlinking it from
11667 * they pseudo parent.
11669 cur = ctxt->myDoc->children->children;
11671 while (cur != NULL) {
11672 #ifdef LIBXML_VALID_ENABLED
11673 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11674 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11675 (cur->type == XML_ELEMENT_NODE)) {
11676 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11677 oldctxt->myDoc, cur);
11679 #endif /* LIBXML_VALID_ENABLED */
11680 cur->parent = NULL;
11683 ctxt->myDoc->children->children = NULL;
11685 if (ctxt->myDoc != NULL) {
11686 xmlFreeNode(ctxt->myDoc->children);
11687 ctxt->myDoc->children = content;
11688 ctxt->myDoc->last = last;
11691 ctxt->sax = oldsax;
11693 ctxt->attsDefault = NULL;
11694 ctxt->attsSpecial = NULL;
11695 xmlFreeParserCtxt(ctxt);
11696 if (newDoc != NULL) {
11697 xmlFreeDoc(newDoc);
11704 * xmlParseInNodeContext:
11705 * @node: the context node
11706 * @data: the input string
11707 * @datalen: the input string length in bytes
11708 * @options: a combination of xmlParserOption
11709 * @lst: the return value for the set of parsed nodes
11711 * Parse a well-balanced chunk of an XML document
11712 * within the context (DTD, namespaces, etc ...) of the given node.
11714 * The allowed sequence for the data is a Well Balanced Chunk defined by
11715 * the content production in the XML grammar:
11717 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11719 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11720 * error code otherwise
11723 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11724 int options, xmlNodePtr *lst) {
11726 xmlParserCtxtPtr ctxt;
11727 xmlDocPtr doc = NULL;
11728 xmlNodePtr fake, cur;
11731 xmlParserErrors ret = XML_ERR_OK;
11734 * check all input parameters, grab the document
11736 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11737 return(XML_ERR_INTERNAL_ERROR);
11738 switch (node->type) {
11739 case XML_ELEMENT_NODE:
11740 case XML_ATTRIBUTE_NODE:
11741 case XML_TEXT_NODE:
11742 case XML_CDATA_SECTION_NODE:
11743 case XML_ENTITY_REF_NODE:
11745 case XML_COMMENT_NODE:
11746 case XML_DOCUMENT_NODE:
11747 case XML_HTML_DOCUMENT_NODE:
11750 return(XML_ERR_INTERNAL_ERROR);
11753 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11754 (node->type != XML_DOCUMENT_NODE) &&
11755 (node->type != XML_HTML_DOCUMENT_NODE))
11756 node = node->parent;
11758 return(XML_ERR_INTERNAL_ERROR);
11759 if (node->type == XML_ELEMENT_NODE)
11762 doc = (xmlDocPtr) node;
11764 return(XML_ERR_INTERNAL_ERROR);
11767 * allocate a context and set-up everything not related to the
11768 * node position in the tree
11770 if (doc->type == XML_DOCUMENT_NODE)
11771 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11772 #ifdef LIBXML_HTML_ENABLED
11773 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11774 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11777 return(XML_ERR_INTERNAL_ERROR);
11780 return(XML_ERR_NO_MEMORY);
11781 fake = xmlNewComment(NULL);
11782 if (fake == NULL) {
11783 xmlFreeParserCtxt(ctxt);
11784 return(XML_ERR_NO_MEMORY);
11786 xmlAddChild(node, fake);
11789 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11790 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11791 * we must wait until the last moment to free the original one.
11793 if (doc->dict != NULL) {
11794 if (ctxt->dict != NULL)
11795 xmlDictFree(ctxt->dict);
11796 ctxt->dict = doc->dict;
11798 options |= XML_PARSE_NODICT;
11800 xmlCtxtUseOptions(ctxt, options);
11801 xmlDetectSAX2(ctxt);
11804 if (node->type == XML_ELEMENT_NODE) {
11805 nodePush(ctxt, node);
11807 * initialize the SAX2 namespaces stack
11810 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11811 xmlNsPtr ns = cur->nsDef;
11812 const xmlChar *iprefix, *ihref;
11814 while (ns != NULL) {
11816 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11817 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11819 iprefix = ns->prefix;
11823 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11824 nsPush(ctxt, iprefix, ihref);
11831 ctxt->instate = XML_PARSER_CONTENT;
11834 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11836 * ID/IDREF registration will be done in xmlValidateElement below
11838 ctxt->loadsubset |= XML_SKIP_IDS;
11841 #ifdef LIBXML_HTML_ENABLED
11842 if (doc->type == XML_HTML_DOCUMENT_NODE)
11843 __htmlParseContent(ctxt);
11846 xmlParseContent(ctxt);
11849 if ((RAW == '<') && (NXT(1) == '/')) {
11850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11851 } else if (RAW != 0) {
11852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11854 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11856 ctxt->wellFormed = 0;
11859 if (!ctxt->wellFormed) {
11860 if (ctxt->errNo == 0)
11861 ret = XML_ERR_INTERNAL_ERROR;
11863 ret = (xmlParserErrors)ctxt->errNo;
11869 * Return the newly created nodeset after unlinking it from
11870 * the pseudo sibling.
11883 while (cur != NULL) {
11884 cur->parent = NULL;
11888 xmlUnlinkNode(fake);
11892 if (ret != XML_ERR_OK) {
11893 xmlFreeNodeList(*lst);
11897 if (doc->dict != NULL)
11899 xmlFreeParserCtxt(ctxt);
11903 return(XML_ERR_INTERNAL_ERROR);
11907 #ifdef LIBXML_SAX1_ENABLED
11909 * xmlParseBalancedChunkMemoryRecover:
11910 * @doc: the document the chunk pertains to
11911 * @sax: the SAX handler bloc (possibly NULL)
11912 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11913 * @depth: Used for loop detection, use 0
11914 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11915 * @lst: the return value for the set of parsed nodes
11916 * @recover: return nodes even if the data is broken (use 0)
11919 * Parse a well-balanced chunk of an XML document
11920 * called by the parser
11921 * The allowed sequence for the Well Balanced Chunk is the one defined by
11922 * the content production in the XML grammar:
11924 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11926 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11927 * the parser error code otherwise
11929 * In case recover is set to 1, the nodelist will not be empty even if
11930 * the parsed chunk is not well balanced.
11933 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11934 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11936 xmlParserCtxtPtr ctxt;
11938 xmlSAXHandlerPtr oldsax = NULL;
11939 xmlNodePtr content, newRoot;
11944 return(XML_ERR_ENTITY_LOOP);
11950 if (string == NULL)
11953 size = xmlStrlen(string);
11955 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11956 if (ctxt == NULL) return(-1);
11957 ctxt->userData = ctxt;
11959 oldsax = ctxt->sax;
11961 if (user_data != NULL)
11962 ctxt->userData = user_data;
11964 newDoc = xmlNewDoc(BAD_CAST "1.0");
11965 if (newDoc == NULL) {
11966 xmlFreeParserCtxt(ctxt);
11969 if ((doc != NULL) && (doc->dict != NULL)) {
11970 xmlDictFree(ctxt->dict);
11971 ctxt->dict = doc->dict;
11972 xmlDictReference(ctxt->dict);
11973 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11974 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11975 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11976 ctxt->dictNames = 1;
11978 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11981 newDoc->intSubset = doc->intSubset;
11982 newDoc->extSubset = doc->extSubset;
11984 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11985 if (newRoot == NULL) {
11987 ctxt->sax = oldsax;
11988 xmlFreeParserCtxt(ctxt);
11989 newDoc->intSubset = NULL;
11990 newDoc->extSubset = NULL;
11991 xmlFreeDoc(newDoc);
11994 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11995 nodePush(ctxt, newRoot);
11997 ctxt->myDoc = newDoc;
11999 ctxt->myDoc = newDoc;
12000 newDoc->children->doc = doc;
12001 /* Ensure that doc has XML spec namespace */
12002 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12003 newDoc->oldNs = doc->oldNs;
12005 ctxt->instate = XML_PARSER_CONTENT;
12006 ctxt->depth = depth;
12009 * Doing validity checking on chunk doesn't make sense
12011 ctxt->validate = 0;
12012 ctxt->loadsubset = 0;
12013 xmlDetectSAX2(ctxt);
12015 if ( doc != NULL ){
12016 content = doc->children;
12017 doc->children = NULL;
12018 xmlParseContent(ctxt);
12019 doc->children = content;
12022 xmlParseContent(ctxt);
12024 if ((RAW == '<') && (NXT(1) == '/')) {
12025 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12026 } else if (RAW != 0) {
12027 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12029 if (ctxt->node != newDoc->children) {
12030 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12033 if (!ctxt->wellFormed) {
12034 if (ctxt->errNo == 0)
12042 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12046 * Return the newly created nodeset after unlinking it from
12047 * they pseudo parent.
12049 cur = newDoc->children->children;
12051 while (cur != NULL) {
12052 xmlSetTreeDoc(cur, doc);
12053 cur->parent = NULL;
12056 newDoc->children->children = NULL;
12060 ctxt->sax = oldsax;
12061 xmlFreeParserCtxt(ctxt);
12062 newDoc->intSubset = NULL;
12063 newDoc->extSubset = NULL;
12064 newDoc->oldNs = NULL;
12065 xmlFreeDoc(newDoc);
12071 * xmlSAXParseEntity:
12072 * @sax: the SAX handler block
12073 * @filename: the filename
12075 * parse an XML external entity out of context and build a tree.
12076 * It use the given SAX function block to handle the parsing callback.
12077 * If sax is NULL, fallback to the default DOM tree building routines.
12079 * [78] extParsedEnt ::= TextDecl? content
12081 * This correspond to a "Well Balanced" chunk
12083 * Returns the resulting document tree
12087 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12089 xmlParserCtxtPtr ctxt;
12091 ctxt = xmlCreateFileParserCtxt(filename);
12092 if (ctxt == NULL) {
12096 if (ctxt->sax != NULL)
12097 xmlFree(ctxt->sax);
12099 ctxt->userData = NULL;
12102 xmlParseExtParsedEnt(ctxt);
12104 if (ctxt->wellFormed)
12108 xmlFreeDoc(ctxt->myDoc);
12109 ctxt->myDoc = NULL;
12113 xmlFreeParserCtxt(ctxt);
12120 * @filename: the filename
12122 * parse an XML external entity out of context and build a tree.
12124 * [78] extParsedEnt ::= TextDecl? content
12126 * This correspond to a "Well Balanced" chunk
12128 * Returns the resulting document tree
12132 xmlParseEntity(const char *filename) {
12133 return(xmlSAXParseEntity(NULL, filename));
12135 #endif /* LIBXML_SAX1_ENABLED */
12138 * xmlCreateEntityParserCtxt:
12139 * @URL: the entity URL
12140 * @ID: the entity PUBLIC ID
12141 * @base: a possible base for the target URI
12143 * Create a parser context for an external entity
12144 * Automatic support for ZLIB/Compress compressed document is provided
12145 * by default if found at compile-time.
12147 * Returns the new parser context or NULL
12150 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12151 const xmlChar *base) {
12152 xmlParserCtxtPtr ctxt;
12153 xmlParserInputPtr inputStream;
12154 char *directory = NULL;
12157 ctxt = xmlNewParserCtxt();
12158 if (ctxt == NULL) {
12162 uri = xmlBuildURI(URL, base);
12165 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12166 if (inputStream == NULL) {
12167 xmlFreeParserCtxt(ctxt);
12171 inputPush(ctxt, inputStream);
12173 if ((ctxt->directory == NULL) && (directory == NULL))
12174 directory = xmlParserGetDirectory((char *)URL);
12175 if ((ctxt->directory == NULL) && (directory != NULL))
12176 ctxt->directory = directory;
12178 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12179 if (inputStream == NULL) {
12181 xmlFreeParserCtxt(ctxt);
12185 inputPush(ctxt, inputStream);
12187 if ((ctxt->directory == NULL) && (directory == NULL))
12188 directory = xmlParserGetDirectory((char *)uri);
12189 if ((ctxt->directory == NULL) && (directory != NULL))
12190 ctxt->directory = directory;
12196 /************************************************************************
12198 * Front ends when parsing from a file *
12200 ************************************************************************/
12203 * xmlCreateURLParserCtxt:
12204 * @filename: the filename or URL
12205 * @options: a combination of xmlParserOption
12207 * Create a parser context for a file or URL content.
12208 * Automatic support for ZLIB/Compress compressed document is provided
12209 * by default if found at compile-time and for file accesses
12211 * Returns the new parser context or NULL
12214 xmlCreateURLParserCtxt(const char *filename, int options)
12216 xmlParserCtxtPtr ctxt;
12217 xmlParserInputPtr inputStream;
12218 char *directory = NULL;
12220 ctxt = xmlNewParserCtxt();
12221 if (ctxt == NULL) {
12222 xmlErrMemory(NULL, "cannot allocate parser context");
12227 xmlCtxtUseOptions(ctxt, options);
12228 ctxt->linenumbers = 1;
12230 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12231 if (inputStream == NULL) {
12232 xmlFreeParserCtxt(ctxt);
12236 inputPush(ctxt, inputStream);
12237 if ((ctxt->directory == NULL) && (directory == NULL))
12238 directory = xmlParserGetDirectory(filename);
12239 if ((ctxt->directory == NULL) && (directory != NULL))
12240 ctxt->directory = directory;
12246 * xmlCreateFileParserCtxt:
12247 * @filename: the filename
12249 * Create a parser context for a file content.
12250 * Automatic support for ZLIB/Compress compressed document is provided
12251 * by default if found at compile-time.
12253 * Returns the new parser context or NULL
12256 xmlCreateFileParserCtxt(const char *filename)
12258 return(xmlCreateURLParserCtxt(filename, 0));
12261 #ifdef LIBXML_SAX1_ENABLED
12263 * xmlSAXParseFileWithData:
12264 * @sax: the SAX handler block
12265 * @filename: the filename
12266 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12268 * @data: the userdata
12270 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12271 * compressed document is provided by default if found at compile-time.
12272 * It use the given SAX function block to handle the parsing callback.
12273 * If sax is NULL, fallback to the default DOM tree building routines.
12275 * User data (void *) is stored within the parser context in the
12276 * context's _private member, so it is available nearly everywhere in libxml
12278 * Returns the resulting document tree
12282 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12283 int recovery, void *data) {
12285 xmlParserCtxtPtr ctxt;
12286 char *directory = NULL;
12290 ctxt = xmlCreateFileParserCtxt(filename);
12291 if (ctxt == NULL) {
12295 if (ctxt->sax != NULL)
12296 xmlFree(ctxt->sax);
12299 xmlDetectSAX2(ctxt);
12301 ctxt->_private = data;
12304 if ((ctxt->directory == NULL) && (directory == NULL))
12305 directory = xmlParserGetDirectory(filename);
12306 if ((ctxt->directory == NULL) && (directory != NULL))
12307 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12309 ctxt->recovery = recovery;
12311 xmlParseDocument(ctxt);
12313 if ((ctxt->wellFormed) || recovery) {
12316 if (ctxt->input->buf->compressed > 0)
12317 ret->compression = 9;
12319 ret->compression = ctxt->input->buf->compressed;
12324 xmlFreeDoc(ctxt->myDoc);
12325 ctxt->myDoc = NULL;
12329 xmlFreeParserCtxt(ctxt);
12336 * @sax: the SAX handler block
12337 * @filename: the filename
12338 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12341 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12342 * compressed document is provided by default if found at compile-time.
12343 * It use the given SAX function block to handle the parsing callback.
12344 * If sax is NULL, fallback to the default DOM tree building routines.
12346 * Returns the resulting document tree
12350 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12352 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12357 * @cur: a pointer to an array of xmlChar
12359 * parse an XML in-memory document and build a tree.
12360 * In the case the document is not Well Formed, a tree is built anyway
12362 * Returns the resulting document tree
12366 xmlRecoverDoc(xmlChar *cur) {
12367 return(xmlSAXParseDoc(NULL, cur, 1));
12372 * @filename: the filename
12374 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12375 * compressed document is provided by default if found at compile-time.
12377 * Returns the resulting document tree if the file was wellformed,
12382 xmlParseFile(const char *filename) {
12383 return(xmlSAXParseFile(NULL, filename, 0));
12388 * @filename: the filename
12390 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12391 * compressed document is provided by default if found at compile-time.
12392 * In the case the document is not Well Formed, a tree is built anyway
12394 * Returns the resulting document tree
12398 xmlRecoverFile(const char *filename) {
12399 return(xmlSAXParseFile(NULL, filename, 1));
12404 * xmlSetupParserForBuffer:
12405 * @ctxt: an XML parser context
12406 * @buffer: a xmlChar * buffer
12407 * @filename: a file name
12409 * Setup the parser context to parse a new buffer; Clears any prior
12410 * contents from the parser context. The buffer parameter must not be
12411 * NULL, but the filename parameter can be
12414 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12415 const char* filename)
12417 xmlParserInputPtr input;
12419 if ((ctxt == NULL) || (buffer == NULL))
12422 input = xmlNewInputStream(ctxt);
12423 if (input == NULL) {
12424 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12425 xmlClearParserCtxt(ctxt);
12429 xmlClearParserCtxt(ctxt);
12430 if (filename != NULL)
12431 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12432 input->base = buffer;
12433 input->cur = buffer;
12434 input->end = &buffer[xmlStrlen(buffer)];
12435 inputPush(ctxt, input);
12439 * xmlSAXUserParseFile:
12440 * @sax: a SAX handler
12441 * @user_data: The user data returned on SAX callbacks
12442 * @filename: a file name
12444 * parse an XML file and call the given SAX handler routines.
12445 * Automatic support for ZLIB/Compress compressed document is provided
12447 * Returns 0 in case of success or a error number otherwise
12450 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12451 const char *filename) {
12453 xmlParserCtxtPtr ctxt;
12455 ctxt = xmlCreateFileParserCtxt(filename);
12456 if (ctxt == NULL) return -1;
12457 #ifdef LIBXML_SAX1_ENABLED
12458 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12459 #endif /* LIBXML_SAX1_ENABLED */
12460 xmlFree(ctxt->sax);
12462 xmlDetectSAX2(ctxt);
12464 if (user_data != NULL)
12465 ctxt->userData = user_data;
12467 xmlParseDocument(ctxt);
12469 if (ctxt->wellFormed)
12472 if (ctxt->errNo != 0)
12479 if (ctxt->myDoc != NULL) {
12480 xmlFreeDoc(ctxt->myDoc);
12481 ctxt->myDoc = NULL;
12483 xmlFreeParserCtxt(ctxt);
12487 #endif /* LIBXML_SAX1_ENABLED */
12489 /************************************************************************
12491 * Front ends when parsing from memory *
12493 ************************************************************************/
12496 * xmlCreateMemoryParserCtxt:
12497 * @buffer: a pointer to a char array
12498 * @size: the size of the array
12500 * Create a parser context for an XML in-memory document.
12502 * Returns the new parser context or NULL
12505 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12506 xmlParserCtxtPtr ctxt;
12507 xmlParserInputPtr input;
12508 xmlParserInputBufferPtr buf;
12510 if (buffer == NULL)
12515 ctxt = xmlNewParserCtxt();
12519 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12520 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12522 xmlFreeParserCtxt(ctxt);
12526 input = xmlNewInputStream(ctxt);
12527 if (input == NULL) {
12528 xmlFreeParserInputBuffer(buf);
12529 xmlFreeParserCtxt(ctxt);
12533 input->filename = NULL;
12535 input->base = input->buf->buffer->content;
12536 input->cur = input->buf->buffer->content;
12537 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12539 inputPush(ctxt, input);
12543 #ifdef LIBXML_SAX1_ENABLED
12545 * xmlSAXParseMemoryWithData:
12546 * @sax: the SAX handler block
12547 * @buffer: an pointer to a char array
12548 * @size: the size of the array
12549 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12551 * @data: the userdata
12553 * parse an XML in-memory block and use the given SAX function block
12554 * to handle the parsing callback. If sax is NULL, fallback to the default
12555 * DOM tree building routines.
12557 * User data (void *) is stored within the parser context in the
12558 * context's _private member, so it is available nearly everywhere in libxml
12560 * Returns the resulting document tree
12564 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12565 int size, int recovery, void *data) {
12567 xmlParserCtxtPtr ctxt;
12569 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12570 if (ctxt == NULL) return(NULL);
12572 if (ctxt->sax != NULL)
12573 xmlFree(ctxt->sax);
12576 xmlDetectSAX2(ctxt);
12578 ctxt->_private=data;
12581 ctxt->recovery = recovery;
12583 xmlParseDocument(ctxt);
12585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12588 xmlFreeDoc(ctxt->myDoc);
12589 ctxt->myDoc = NULL;
12593 xmlFreeParserCtxt(ctxt);
12599 * xmlSAXParseMemory:
12600 * @sax: the SAX handler block
12601 * @buffer: an pointer to a char array
12602 * @size: the size of the array
12603 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12606 * parse an XML in-memory block and use the given SAX function block
12607 * to handle the parsing callback. If sax is NULL, fallback to the default
12608 * DOM tree building routines.
12610 * Returns the resulting document tree
12613 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12614 int size, int recovery) {
12615 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12620 * @buffer: an pointer to a char array
12621 * @size: the size of the array
12623 * parse an XML in-memory block and build a tree.
12625 * Returns the resulting document tree
12628 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12629 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12633 * xmlRecoverMemory:
12634 * @buffer: an pointer to a char array
12635 * @size: the size of the array
12637 * parse an XML in-memory block and build a tree.
12638 * In the case the document is not Well Formed, a tree is built anyway
12640 * Returns the resulting document tree
12643 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12644 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12648 * xmlSAXUserParseMemory:
12649 * @sax: a SAX handler
12650 * @user_data: The user data returned on SAX callbacks
12651 * @buffer: an in-memory XML document input
12652 * @size: the length of the XML document in bytes
12654 * A better SAX parsing routine.
12655 * parse an XML in-memory buffer and call the given SAX handler routines.
12657 * Returns 0 in case of success or a error number otherwise
12659 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12660 const char *buffer, int size) {
12662 xmlParserCtxtPtr ctxt;
12663 xmlSAXHandlerPtr oldsax = NULL;
12665 if (sax == NULL) return -1;
12666 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12667 if (ctxt == NULL) return -1;
12668 oldsax = ctxt->sax;
12670 xmlDetectSAX2(ctxt);
12671 if (user_data != NULL)
12672 ctxt->userData = user_data;
12674 xmlParseDocument(ctxt);
12676 if (ctxt->wellFormed)
12679 if (ctxt->errNo != 0)
12684 ctxt->sax = oldsax;
12685 if (ctxt->myDoc != NULL) {
12686 xmlFreeDoc(ctxt->myDoc);
12687 ctxt->myDoc = NULL;
12689 xmlFreeParserCtxt(ctxt);
12693 #endif /* LIBXML_SAX1_ENABLED */
12696 * xmlCreateDocParserCtxt:
12697 * @cur: a pointer to an array of xmlChar
12699 * Creates a parser context for an XML in-memory document.
12701 * Returns the new parser context or NULL
12704 xmlCreateDocParserCtxt(const xmlChar *cur) {
12709 len = xmlStrlen(cur);
12710 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
12713 #ifdef LIBXML_SAX1_ENABLED
12716 * @sax: the SAX handler block
12717 * @cur: a pointer to an array of xmlChar
12718 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12721 * parse an XML in-memory document and build a tree.
12722 * It use the given SAX function block to handle the parsing callback.
12723 * If sax is NULL, fallback to the default DOM tree building routines.
12725 * Returns the resulting document tree
12729 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12731 xmlParserCtxtPtr ctxt;
12732 xmlSAXHandlerPtr oldsax = NULL;
12734 if (cur == NULL) return(NULL);
12737 ctxt = xmlCreateDocParserCtxt(cur);
12738 if (ctxt == NULL) return(NULL);
12740 oldsax = ctxt->sax;
12742 ctxt->userData = NULL;
12744 xmlDetectSAX2(ctxt);
12746 xmlParseDocument(ctxt);
12747 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12750 xmlFreeDoc(ctxt->myDoc);
12751 ctxt->myDoc = NULL;
12754 ctxt->sax = oldsax;
12755 xmlFreeParserCtxt(ctxt);
12762 * @cur: a pointer to an array of xmlChar
12764 * parse an XML in-memory document and build a tree.
12766 * Returns the resulting document tree
12770 xmlParseDoc(const xmlChar *cur) {
12771 return(xmlSAXParseDoc(NULL, cur, 0));
12773 #endif /* LIBXML_SAX1_ENABLED */
12775 #ifdef LIBXML_LEGACY_ENABLED
12776 /************************************************************************
12778 * Specific function to keep track of entities references *
12779 * and used by the XSLT debugger *
12781 ************************************************************************/
12783 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12786 * xmlAddEntityReference:
12787 * @ent : A valid entity
12788 * @firstNode : A valid first node for children of entity
12789 * @lastNode : A valid last node of children entity
12791 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12794 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12795 xmlNodePtr lastNode)
12797 if (xmlEntityRefFunc != NULL) {
12798 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12804 * xmlSetEntityReferenceFunc:
12805 * @func: A valid function
12807 * Set the function to call call back when a xml reference has been made
12810 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12812 xmlEntityRefFunc = func;
12814 #endif /* LIBXML_LEGACY_ENABLED */
12816 /************************************************************************
12820 ************************************************************************/
12822 #ifdef LIBXML_XPATH_ENABLED
12823 #include <libxml/xpath.h>
12826 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
12827 static int xmlParserInitialized = 0;
12832 * Initialization function for the XML parser.
12833 * This is not reentrant. Call once before processing in case of
12834 * use in multithreaded programs.
12838 xmlInitParser(void) {
12839 if (xmlParserInitialized != 0)
12842 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12843 (xmlGenericError == NULL))
12844 initGenericErrorDefaultFunc(NULL);
12848 xmlInitCharEncodingHandlers();
12849 xmlDefaultSAXHandlerInit();
12850 xmlRegisterDefaultInputCallbacks();
12851 #ifdef LIBXML_OUTPUT_ENABLED
12852 xmlRegisterDefaultOutputCallbacks();
12853 #endif /* LIBXML_OUTPUT_ENABLED */
12854 #ifdef LIBXML_HTML_ENABLED
12855 htmlInitAutoClose();
12856 htmlDefaultSAXHandlerInit();
12858 #ifdef LIBXML_XPATH_ENABLED
12861 xmlParserInitialized = 1;
12865 * xmlCleanupParser:
12867 * Cleanup function for the XML library. It tries to reclaim all
12868 * parsing related global memory allocated for the library processing.
12869 * It doesn't deallocate any document related memory. Calling this
12870 * function should not prevent reusing the library but one should
12871 * call xmlCleanupParser() only when the process has
12872 * finished using the library or XML document built with it.
12876 xmlCleanupParser(void) {
12877 if (!xmlParserInitialized)
12880 xmlCleanupCharEncodingHandlers();
12881 #ifdef LIBXML_CATALOG_ENABLED
12882 xmlCatalogCleanup();
12885 xmlCleanupInputCallbacks();
12886 #ifdef LIBXML_OUTPUT_ENABLED
12887 xmlCleanupOutputCallbacks();
12889 #ifdef LIBXML_SCHEMAS_ENABLED
12890 xmlSchemaCleanupTypes();
12891 xmlRelaxNGCleanupTypes();
12893 xmlCleanupGlobals();
12894 xmlResetLastError();
12895 xmlCleanupThreads(); /* must be last if called not from the main thread */
12896 xmlCleanupMemory();
12897 xmlParserInitialized = 0;
12900 /************************************************************************
12902 * New set (2.6.0) of simpler and more flexible APIs *
12904 ************************************************************************/
12910 * Free a string if it is not owned by the "dict" dictionnary in the
12913 #define DICT_FREE(str) \
12914 if ((str) && ((!dict) || \
12915 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12916 xmlFree((char *)(str));
12920 * @ctxt: an XML parser context
12922 * Reset a parser context
12925 xmlCtxtReset(xmlParserCtxtPtr ctxt)
12927 xmlParserInputPtr input;
12935 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12936 xmlFreeInputStream(input);
12939 ctxt->input = NULL;
12942 ctxt->spaceTab[0] = -1;
12943 ctxt->space = &ctxt->spaceTab[0];
12952 DICT_FREE(ctxt->version);
12953 ctxt->version = NULL;
12954 DICT_FREE(ctxt->encoding);
12955 ctxt->encoding = NULL;
12956 DICT_FREE(ctxt->directory);
12957 ctxt->directory = NULL;
12958 DICT_FREE(ctxt->extSubURI);
12959 ctxt->extSubURI = NULL;
12960 DICT_FREE(ctxt->extSubSystem);
12961 ctxt->extSubSystem = NULL;
12962 if (ctxt->myDoc != NULL)
12963 xmlFreeDoc(ctxt->myDoc);
12964 ctxt->myDoc = NULL;
12966 ctxt->standalone = -1;
12967 ctxt->hasExternalSubset = 0;
12968 ctxt->hasPErefs = 0;
12970 ctxt->external = 0;
12971 ctxt->instate = XML_PARSER_START;
12974 ctxt->wellFormed = 1;
12975 ctxt->nsWellFormed = 1;
12976 ctxt->disableSAX = 0;
12979 ctxt->vctxt.userData = ctxt;
12980 ctxt->vctxt.error = xmlParserValidityError;
12981 ctxt->vctxt.warning = xmlParserValidityWarning;
12983 ctxt->record_info = 0;
12985 ctxt->checkIndex = 0;
12986 ctxt->inSubset = 0;
12987 ctxt->errNo = XML_ERR_OK;
12989 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12990 ctxt->catalogs = NULL;
12991 xmlInitNodeInfoSeq(&ctxt->node_seq);
12993 if (ctxt->attsDefault != NULL) {
12994 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12995 ctxt->attsDefault = NULL;
12997 if (ctxt->attsSpecial != NULL) {
12998 xmlHashFree(ctxt->attsSpecial, NULL);
12999 ctxt->attsSpecial = NULL;
13002 #ifdef LIBXML_CATALOG_ENABLED
13003 if (ctxt->catalogs != NULL)
13004 xmlCatalogFreeLocal(ctxt->catalogs);
13006 if (ctxt->lastError.code != XML_ERR_OK)
13007 xmlResetError(&ctxt->lastError);
13011 * xmlCtxtResetPush:
13012 * @ctxt: an XML parser context
13013 * @chunk: a pointer to an array of chars
13014 * @size: number of chars in the array
13015 * @filename: an optional file name or URI
13016 * @encoding: the document encoding, or NULL
13018 * Reset a push parser context
13020 * Returns 0 in case of success and 1 in case of error
13023 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13024 int size, const char *filename, const char *encoding)
13026 xmlParserInputPtr inputStream;
13027 xmlParserInputBufferPtr buf;
13028 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13033 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13034 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13036 buf = xmlAllocParserInputBuffer(enc);
13040 if (ctxt == NULL) {
13041 xmlFreeParserInputBuffer(buf);
13045 xmlCtxtReset(ctxt);
13047 if (ctxt->pushTab == NULL) {
13048 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13049 sizeof(xmlChar *));
13050 if (ctxt->pushTab == NULL) {
13051 xmlErrMemory(ctxt, NULL);
13052 xmlFreeParserInputBuffer(buf);
13057 if (filename == NULL) {
13058 ctxt->directory = NULL;
13060 ctxt->directory = xmlParserGetDirectory(filename);
13063 inputStream = xmlNewInputStream(ctxt);
13064 if (inputStream == NULL) {
13065 xmlFreeParserInputBuffer(buf);
13069 if (filename == NULL)
13070 inputStream->filename = NULL;
13072 inputStream->filename = (char *)
13073 xmlCanonicPath((const xmlChar *) filename);
13074 inputStream->buf = buf;
13075 inputStream->base = inputStream->buf->buffer->content;
13076 inputStream->cur = inputStream->buf->buffer->content;
13078 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13080 inputPush(ctxt, inputStream);
13082 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13083 (ctxt->input->buf != NULL)) {
13084 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13085 int cur = ctxt->input->cur - ctxt->input->base;
13087 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13089 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13090 ctxt->input->cur = ctxt->input->base + cur;
13092 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13095 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13099 if (encoding != NULL) {
13100 xmlCharEncodingHandlerPtr hdlr;
13102 hdlr = xmlFindCharEncodingHandler(encoding);
13103 if (hdlr != NULL) {
13104 xmlSwitchToEncoding(ctxt, hdlr);
13106 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13107 "Unsupported encoding %s\n", BAD_CAST encoding);
13109 } else if (enc != XML_CHAR_ENCODING_NONE) {
13110 xmlSwitchEncoding(ctxt, enc);
13117 * xmlCtxtUseOptions:
13118 * @ctxt: an XML parser context
13119 * @options: a combination of xmlParserOption
13121 * Applies the options to the parser context
13123 * Returns 0 in case of success, the set of unknown or unimplemented options
13124 * in case of error.
13127 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13131 if (options & XML_PARSE_RECOVER) {
13132 ctxt->recovery = 1;
13133 options -= XML_PARSE_RECOVER;
13135 ctxt->recovery = 0;
13136 if (options & XML_PARSE_DTDLOAD) {
13137 ctxt->loadsubset = XML_DETECT_IDS;
13138 options -= XML_PARSE_DTDLOAD;
13140 ctxt->loadsubset = 0;
13141 if (options & XML_PARSE_DTDATTR) {
13142 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13143 options -= XML_PARSE_DTDATTR;
13145 if (options & XML_PARSE_NOENT) {
13146 ctxt->replaceEntities = 1;
13147 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13148 options -= XML_PARSE_NOENT;
13150 ctxt->replaceEntities = 0;
13151 if (options & XML_PARSE_PEDANTIC) {
13152 ctxt->pedantic = 1;
13153 options -= XML_PARSE_PEDANTIC;
13155 ctxt->pedantic = 0;
13156 if (options & XML_PARSE_NOBLANKS) {
13157 ctxt->keepBlanks = 0;
13158 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13159 options -= XML_PARSE_NOBLANKS;
13161 ctxt->keepBlanks = 1;
13162 if (options & XML_PARSE_DTDVALID) {
13163 ctxt->validate = 1;
13164 if (options & XML_PARSE_NOWARNING)
13165 ctxt->vctxt.warning = NULL;
13166 if (options & XML_PARSE_NOERROR)
13167 ctxt->vctxt.error = NULL;
13168 options -= XML_PARSE_DTDVALID;
13170 ctxt->validate = 0;
13171 if (options & XML_PARSE_NOWARNING) {
13172 ctxt->sax->warning = NULL;
13173 options -= XML_PARSE_NOWARNING;
13175 if (options & XML_PARSE_NOERROR) {
13176 ctxt->sax->error = NULL;
13177 ctxt->sax->fatalError = NULL;
13178 options -= XML_PARSE_NOERROR;
13180 #ifdef LIBXML_SAX1_ENABLED
13181 if (options & XML_PARSE_SAX1) {
13182 ctxt->sax->startElement = xmlSAX2StartElement;
13183 ctxt->sax->endElement = xmlSAX2EndElement;
13184 ctxt->sax->startElementNs = NULL;
13185 ctxt->sax->endElementNs = NULL;
13186 ctxt->sax->initialized = 1;
13187 options -= XML_PARSE_SAX1;
13189 #endif /* LIBXML_SAX1_ENABLED */
13190 if (options & XML_PARSE_NODICT) {
13191 ctxt->dictNames = 0;
13192 options -= XML_PARSE_NODICT;
13194 ctxt->dictNames = 1;
13196 if (options & XML_PARSE_NOCDATA) {
13197 ctxt->sax->cdataBlock = NULL;
13198 options -= XML_PARSE_NOCDATA;
13200 if (options & XML_PARSE_NSCLEAN) {
13201 ctxt->options |= XML_PARSE_NSCLEAN;
13202 options -= XML_PARSE_NSCLEAN;
13204 if (options & XML_PARSE_NONET) {
13205 ctxt->options |= XML_PARSE_NONET;
13206 options -= XML_PARSE_NONET;
13208 if (options & XML_PARSE_COMPACT) {
13209 ctxt->options |= XML_PARSE_COMPACT;
13210 options -= XML_PARSE_COMPACT;
13212 ctxt->linenumbers = 1;
13218 * @ctxt: an XML parser context
13219 * @URL: the base URL to use for the document
13220 * @encoding: the document encoding, or NULL
13221 * @options: a combination of xmlParserOption
13222 * @reuse: keep the context for reuse
13224 * Common front-end for the xmlRead functions
13226 * Returns the resulting document tree or NULL
13229 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13230 int options, int reuse)
13234 xmlCtxtUseOptions(ctxt, options);
13235 if (encoding != NULL) {
13236 xmlCharEncodingHandlerPtr hdlr;
13238 hdlr = xmlFindCharEncodingHandler(encoding);
13240 xmlSwitchToEncoding(ctxt, hdlr);
13242 if ((URL != NULL) && (ctxt->input != NULL) &&
13243 (ctxt->input->filename == NULL))
13244 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13245 xmlParseDocument(ctxt);
13246 if ((ctxt->wellFormed) || ctxt->recovery)
13250 if (ctxt->myDoc != NULL) {
13251 xmlFreeDoc(ctxt->myDoc);
13254 ctxt->myDoc = NULL;
13256 xmlFreeParserCtxt(ctxt);
13264 * @cur: a pointer to a zero terminated string
13265 * @URL: the base URL to use for the document
13266 * @encoding: the document encoding, or NULL
13267 * @options: a combination of xmlParserOption
13269 * parse an XML in-memory document and build a tree.
13271 * Returns the resulting document tree
13274 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13276 xmlParserCtxtPtr ctxt;
13281 ctxt = xmlCreateDocParserCtxt(cur);
13284 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13289 * @filename: a file or URL
13290 * @encoding: the document encoding, or NULL
13291 * @options: a combination of xmlParserOption
13293 * parse an XML file from the filesystem or the network.
13295 * Returns the resulting document tree
13298 xmlReadFile(const char *filename, const char *encoding, int options)
13300 xmlParserCtxtPtr ctxt;
13302 ctxt = xmlCreateURLParserCtxt(filename, options);
13305 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13310 * @buffer: a pointer to a char array
13311 * @size: the size of the array
13312 * @URL: the base URL to use for the document
13313 * @encoding: the document encoding, or NULL
13314 * @options: a combination of xmlParserOption
13316 * parse an XML in-memory document and build a tree.
13318 * Returns the resulting document tree
13321 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13323 xmlParserCtxtPtr ctxt;
13325 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13328 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13333 * @fd: an open file descriptor
13334 * @URL: the base URL to use for the document
13335 * @encoding: the document encoding, or NULL
13336 * @options: a combination of xmlParserOption
13338 * parse an XML from a file descriptor and build a tree.
13339 * NOTE that the file descriptor will not be closed when the
13340 * reader is closed or reset.
13342 * Returns the resulting document tree
13345 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13347 xmlParserCtxtPtr ctxt;
13348 xmlParserInputBufferPtr input;
13349 xmlParserInputPtr stream;
13354 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13357 input->closecallback = NULL;
13358 ctxt = xmlNewParserCtxt();
13359 if (ctxt == NULL) {
13360 xmlFreeParserInputBuffer(input);
13363 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13364 if (stream == NULL) {
13365 xmlFreeParserInputBuffer(input);
13366 xmlFreeParserCtxt(ctxt);
13369 inputPush(ctxt, stream);
13370 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13375 * @ioread: an I/O read function
13376 * @ioclose: an I/O close function
13377 * @ioctx: an I/O handler
13378 * @URL: the base URL to use for the document
13379 * @encoding: the document encoding, or NULL
13380 * @options: a combination of xmlParserOption
13382 * parse an XML document from I/O functions and source and build a tree.
13384 * Returns the resulting document tree
13387 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13388 void *ioctx, const char *URL, const char *encoding, int options)
13390 xmlParserCtxtPtr ctxt;
13391 xmlParserInputBufferPtr input;
13392 xmlParserInputPtr stream;
13394 if (ioread == NULL)
13397 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13398 XML_CHAR_ENCODING_NONE);
13401 ctxt = xmlNewParserCtxt();
13402 if (ctxt == NULL) {
13403 xmlFreeParserInputBuffer(input);
13406 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13407 if (stream == NULL) {
13408 xmlFreeParserInputBuffer(input);
13409 xmlFreeParserCtxt(ctxt);
13412 inputPush(ctxt, stream);
13413 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13418 * @ctxt: an XML parser context
13419 * @cur: a pointer to a zero terminated string
13420 * @URL: the base URL to use for the document
13421 * @encoding: the document encoding, or NULL
13422 * @options: a combination of xmlParserOption
13424 * parse an XML in-memory document and build a tree.
13425 * This reuses the existing @ctxt parser context
13427 * Returns the resulting document tree
13430 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13431 const char *URL, const char *encoding, int options)
13433 xmlParserInputPtr stream;
13440 xmlCtxtReset(ctxt);
13442 stream = xmlNewStringInputStream(ctxt, cur);
13443 if (stream == NULL) {
13446 inputPush(ctxt, stream);
13447 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13452 * @ctxt: an XML parser context
13453 * @filename: a file or URL
13454 * @encoding: the document encoding, or NULL
13455 * @options: a combination of xmlParserOption
13457 * parse an XML file from the filesystem or the network.
13458 * This reuses the existing @ctxt parser context
13460 * Returns the resulting document tree
13463 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13464 const char *encoding, int options)
13466 xmlParserInputPtr stream;
13468 if (filename == NULL)
13473 xmlCtxtReset(ctxt);
13475 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13476 if (stream == NULL) {
13479 inputPush(ctxt, stream);
13480 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13484 * xmlCtxtReadMemory:
13485 * @ctxt: an XML parser context
13486 * @buffer: a pointer to a char array
13487 * @size: the size of the array
13488 * @URL: the base URL to use for the document
13489 * @encoding: the document encoding, or NULL
13490 * @options: a combination of xmlParserOption
13492 * parse an XML in-memory document and build a tree.
13493 * This reuses the existing @ctxt parser context
13495 * Returns the resulting document tree
13498 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13499 const char *URL, const char *encoding, int options)
13501 xmlParserInputBufferPtr input;
13502 xmlParserInputPtr stream;
13506 if (buffer == NULL)
13509 xmlCtxtReset(ctxt);
13511 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13512 if (input == NULL) {
13516 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13517 if (stream == NULL) {
13518 xmlFreeParserInputBuffer(input);
13522 inputPush(ctxt, stream);
13523 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13528 * @ctxt: an XML parser context
13529 * @fd: an open file descriptor
13530 * @URL: the base URL to use for the document
13531 * @encoding: the document encoding, or NULL
13532 * @options: a combination of xmlParserOption
13534 * parse an XML from a file descriptor and build a tree.
13535 * This reuses the existing @ctxt parser context
13536 * NOTE that the file descriptor will not be closed when the
13537 * reader is closed or reset.
13539 * Returns the resulting document tree
13542 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13543 const char *URL, const char *encoding, int options)
13545 xmlParserInputBufferPtr input;
13546 xmlParserInputPtr stream;
13553 xmlCtxtReset(ctxt);
13556 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13559 input->closecallback = NULL;
13560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13561 if (stream == NULL) {
13562 xmlFreeParserInputBuffer(input);
13565 inputPush(ctxt, stream);
13566 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13571 * @ctxt: an XML parser context
13572 * @ioread: an I/O read function
13573 * @ioclose: an I/O close function
13574 * @ioctx: an I/O handler
13575 * @URL: the base URL to use for the document
13576 * @encoding: the document encoding, or NULL
13577 * @options: a combination of xmlParserOption
13579 * parse an XML document from I/O functions and source and build a tree.
13580 * This reuses the existing @ctxt parser context
13582 * Returns the resulting document tree
13585 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13586 xmlInputCloseCallback ioclose, void *ioctx,
13588 const char *encoding, int options)
13590 xmlParserInputBufferPtr input;
13591 xmlParserInputPtr stream;
13593 if (ioread == NULL)
13598 xmlCtxtReset(ctxt);
13600 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13601 XML_CHAR_ENCODING_NONE);
13604 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13605 if (stream == NULL) {
13606 xmlFreeParserInputBuffer(input);
13609 inputPush(ctxt, stream);
13610 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13613 #define bottom_parser
13614 #include "elfgcchack.h"