* parserInternals.c to reduce this file size.
* As much as possible the functions are associated with their relative
* production in the XML specification. A few productions defining the
- * different ranges of character are actually implanted either in
+ * different ranges of character are actually implanted either in
* parserInternals.h or parserInternals.c
* The DOM tree build is realized from the default SAX callbacks in
* the module SAX.c.
#endif
#include <stdlib.h>
+#include <limits.h>
#include <string.h>
#include <stdarg.h>
#include <libxml/xmlmemory.h>
#include <lzma.h>
#endif
+#include "buf.h"
+#include "enc.h"
+
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
* parser option.
*/
static int
-xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
- xmlEntityPtr ent)
+xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
+ xmlEntityPtr ent, size_t replacement)
{
- unsigned long consumed = 0;
+ size_t consumed = 0;
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
return (0);
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
return (1);
- if (size != 0) {
+ if (replacement != 0) {
+ if (replacement < XML_MAX_TEXT_LENGTH)
+ return(0);
+
+ /*
+ * If the volume of entity copy reaches 10 times the
+ * amount of parsed data and over the large text threshold
+ * then that's very likely to be an abuse.
+ */
+ if (ctxt->input != NULL) {
+ consumed = ctxt->input->consumed +
+ (ctxt->input->cur - ctxt->input->base);
+ }
+ consumed += ctxt->sizeentities;
+
+ if (replacement < XML_PARSER_NON_LINEAR * consumed)
+ return(0);
+ } else if (size != 0) {
/*
* Do the check based on the replacement size of the entity
*/
/*
* use the number of parsed entities in the replacement
*/
- size = ent->checked;
+ size = ent->checked / 2;
/*
* The amount of data parsed counting entities size only once
*/
return (0);
}
-
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
return (1);
}
#define XML_PARSER_BUFFER_SIZE 100
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
+/**
+ * XML_PARSER_CHUNK_SIZE
+ *
+ * When calling GROW that's the minimal amount of data
+ * the parser expected to have received. It is not a hard
+ * limit but an optimization when reading strings like Names
+ * It is not strictly needed as long as inputs available characters
+ * are followed by 0, which should be provided by the I/O level
+ */
+#define XML_PARSER_CHUNK_SIZE 100
+
/*
* List of XML prefixed PI allowed by W3C specs
*/
/************************************************************************
* *
- * Some factorized error routines *
+ * Some factorized error routines *
* *
************************************************************************/
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
{
const char *errmsg;
+ char errstr[129] = "";
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
switch (error) {
case XML_ERR_INVALID_HEX_CHARREF:
- errmsg = "CharRef: invalid hexadecimal value\n";
+ errmsg = "CharRef: invalid hexadecimal value";
break;
case XML_ERR_INVALID_DEC_CHARREF:
- errmsg = "CharRef: invalid decimal value\n";
+ errmsg = "CharRef: invalid decimal value";
break;
case XML_ERR_INVALID_CHARREF:
- errmsg = "CharRef: invalid value\n";
+ errmsg = "CharRef: invalid value";
break;
case XML_ERR_INTERNAL_ERROR:
errmsg = "internal error";
break;
case XML_ERR_PEREF_AT_EOF:
- errmsg = "PEReference at end of document\n";
+ errmsg = "PEReference at end of document";
break;
case XML_ERR_PEREF_IN_PROLOG:
- errmsg = "PEReference in prolog\n";
+ errmsg = "PEReference in prolog";
break;
case XML_ERR_PEREF_IN_EPILOG:
- errmsg = "PEReference in epilog\n";
+ errmsg = "PEReference in epilog";
break;
case XML_ERR_PEREF_NO_NAME:
- errmsg = "PEReference: no name\n";
+ errmsg = "PEReference: no name";
break;
case XML_ERR_PEREF_SEMICOL_MISSING:
- errmsg = "PEReference: expecting ';'\n";
+ errmsg = "PEReference: expecting ';'";
break;
case XML_ERR_ENTITY_LOOP:
- errmsg = "Detected an entity reference loop\n";
+ errmsg = "Detected an entity reference loop";
break;
case XML_ERR_ENTITY_NOT_STARTED:
- errmsg = "EntityValue: \" or ' expected\n";
+ errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ENTITY_PE_INTERNAL:
- errmsg = "PEReferences forbidden in internal subset\n";
+ errmsg = "PEReferences forbidden in internal subset";
break;
case XML_ERR_ENTITY_NOT_FINISHED:
- errmsg = "EntityValue: \" or ' expected\n";
+ errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ATTRIBUTE_NOT_STARTED:
- errmsg = "AttValue: \" or ' expected\n";
+ errmsg = "AttValue: \" or ' expected";
break;
case XML_ERR_LT_IN_ATTRIBUTE:
- errmsg = "Unescaped '<' not allowed in attributes values\n";
+ errmsg = "Unescaped '<' not allowed in attributes values";
break;
case XML_ERR_LITERAL_NOT_STARTED:
- errmsg = "SystemLiteral \" or ' expected\n";
+ errmsg = "SystemLiteral \" or ' expected";
break;
case XML_ERR_LITERAL_NOT_FINISHED:
- errmsg = "Unfinished System or Public ID \" or ' expected\n";
+ errmsg = "Unfinished System or Public ID \" or ' expected";
break;
case XML_ERR_MISPLACED_CDATA_END:
- errmsg = "Sequence ']]>' not allowed in content\n";
+ errmsg = "Sequence ']]>' not allowed in content";
break;
case XML_ERR_URI_REQUIRED:
- errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
+ errmsg = "SYSTEM or PUBLIC, the URI is missing";
break;
case XML_ERR_PUBID_REQUIRED:
- errmsg = "PUBLIC, the Public Identifier is missing\n";
+ errmsg = "PUBLIC, the Public Identifier is missing";
break;
case XML_ERR_HYPHEN_IN_COMMENT:
- errmsg = "Comment must not contain '--' (double-hyphen)\n";
+ errmsg = "Comment must not contain '--' (double-hyphen)";
break;
case XML_ERR_PI_NOT_STARTED:
- errmsg = "xmlParsePI : no target name\n";
+ errmsg = "xmlParsePI : no target name";
break;
case XML_ERR_RESERVED_XML_NAME:
- errmsg = "Invalid PI name\n";
+ errmsg = "Invalid PI name";
break;
case XML_ERR_NOTATION_NOT_STARTED:
- errmsg = "NOTATION: Name expected here\n";
+ errmsg = "NOTATION: Name expected here";
break;
case XML_ERR_NOTATION_NOT_FINISHED:
- errmsg = "'>' required to close NOTATION declaration\n";
+ errmsg = "'>' required to close NOTATION declaration";
break;
case XML_ERR_VALUE_REQUIRED:
- errmsg = "Entity value required\n";
+ errmsg = "Entity value required";
break;
case XML_ERR_URI_FRAGMENT:
errmsg = "Fragment not allowed";
break;
case XML_ERR_ATTLIST_NOT_STARTED:
- errmsg = "'(' required to start ATTLIST enumeration\n";
+ errmsg = "'(' required to start ATTLIST enumeration";
break;
case XML_ERR_NMTOKEN_REQUIRED:
- errmsg = "NmToken expected in ATTLIST enumeration\n";
+ errmsg = "NmToken expected in ATTLIST enumeration";
break;
case XML_ERR_ATTLIST_NOT_FINISHED:
- errmsg = "')' required to finish ATTLIST enumeration\n";
+ errmsg = "')' required to finish ATTLIST enumeration";
break;
case XML_ERR_MIXED_NOT_STARTED:
- errmsg = "MixedContentDecl : '|' or ')*' expected\n";
+ errmsg = "MixedContentDecl : '|' or ')*' expected";
break;
case XML_ERR_PCDATA_REQUIRED:
- errmsg = "MixedContentDecl : '#PCDATA' expected\n";
+ errmsg = "MixedContentDecl : '#PCDATA' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_STARTED:
- errmsg = "ContentDecl : Name or '(' expected\n";
+ errmsg = "ContentDecl : Name or '(' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
- errmsg = "ContentDecl : ',' '|' or ')' expected\n";
+ errmsg = "ContentDecl : ',' '|' or ')' expected";
break;
case XML_ERR_PEREF_IN_INT_SUBSET:
errmsg =
- "PEReference: forbidden within markup decl in internal subset\n";
+ "PEReference: forbidden within markup decl in internal subset";
break;
case XML_ERR_GT_REQUIRED:
- errmsg = "expected '>'\n";
+ errmsg = "expected '>'";
break;
case XML_ERR_CONDSEC_INVALID:
- errmsg = "XML conditional section '[' expected\n";
+ errmsg = "XML conditional section '[' expected";
break;
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
- errmsg = "Content error in the external subset\n";
+ errmsg = "Content error in the external subset";
break;
case XML_ERR_CONDSEC_INVALID_KEYWORD:
errmsg =
- "conditional section INCLUDE or IGNORE keyword expected\n";
+ "conditional section INCLUDE or IGNORE keyword expected";
break;
case XML_ERR_CONDSEC_NOT_FINISHED:
- errmsg = "XML conditional section not closed\n";
+ errmsg = "XML conditional section not closed";
break;
case XML_ERR_XMLDECL_NOT_STARTED:
- errmsg = "Text declaration '<?xml' required\n";
+ errmsg = "Text declaration '<?xml' required";
break;
case XML_ERR_XMLDECL_NOT_FINISHED:
- errmsg = "parsing XML declaration: '?>' expected\n";
+ errmsg = "parsing XML declaration: '?>' expected";
break;
case XML_ERR_EXT_ENTITY_STANDALONE:
- errmsg = "external parsed entities cannot be standalone\n";
+ errmsg = "external parsed entities cannot be standalone";
break;
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
- errmsg = "EntityRef: expecting ';'\n";
+ errmsg = "EntityRef: expecting ';'";
break;
case XML_ERR_DOCTYPE_NOT_FINISHED:
- errmsg = "DOCTYPE improperly terminated\n";
+ errmsg = "DOCTYPE improperly terminated";
break;
case XML_ERR_LTSLASH_REQUIRED:
- errmsg = "EndTag: '</' not found\n";
+ errmsg = "EndTag: '</' not found";
break;
case XML_ERR_EQUAL_REQUIRED:
- errmsg = "expected '='\n";
+ errmsg = "expected '='";
break;
case XML_ERR_STRING_NOT_CLOSED:
- errmsg = "String not closed expecting \" or '\n";
+ errmsg = "String not closed expecting \" or '";
break;
case XML_ERR_STRING_NOT_STARTED:
- errmsg = "String not started expecting ' or \"\n";
+ errmsg = "String not started expecting ' or \"";
break;
case XML_ERR_ENCODING_NAME:
- errmsg = "Invalid XML encoding name\n";
+ errmsg = "Invalid XML encoding name";
break;
case XML_ERR_STANDALONE_VALUE:
- errmsg = "standalone accepts only 'yes' or 'no'\n";
+ errmsg = "standalone accepts only 'yes' or 'no'";
break;
case XML_ERR_DOCUMENT_EMPTY:
- errmsg = "Document is empty\n";
+ errmsg = "Document is empty";
break;
case XML_ERR_DOCUMENT_END:
- errmsg = "Extra content at the end of the document\n";
+ errmsg = "Extra content at the end of the document";
break;
case XML_ERR_NOT_WELL_BALANCED:
- errmsg = "chunk is not well balanced\n";
+ errmsg = "chunk is not well balanced";
break;
case XML_ERR_EXTRA_CONTENT:
- errmsg = "extra content at the end of well balanced chunk\n";
+ errmsg = "extra content at the end of well balanced chunk";
break;
case XML_ERR_VERSION_MISSING:
- errmsg = "Malformed declaration expecting version\n";
+ errmsg = "Malformed declaration expecting version";
+ break;
+ case XML_ERR_NAME_TOO_LONG:
+ errmsg = "Name too long use XML_PARSE_HUGE option";
break;
#if 0
case:
- errmsg = "\n";
+ errmsg = "";
break;
#endif
default:
- errmsg = "Unregistered error message\n";
+ errmsg = "Unregistered error message";
}
+ if (info == NULL)
+ snprintf(errstr, 128, "%s\n", errmsg);
+ else
+ snprintf(errstr, 128, "%s: %%s\n", errmsg);
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
- XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
+ XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
info);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
*/
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
- const char *msg, const xmlChar *str1, int val,
+ const char *msg, const xmlChar *str1, int val,
const xmlChar *str2)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
/************************************************************************
* *
- * Library wide options *
+ * Library wide options *
* *
************************************************************************/
/************************************************************************
* *
- * SAX2 defaulted attributes handling *
+ * SAX2 defaulted attributes handling *
* *
************************************************************************/
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
- if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
- (ctxt->str_xml_ns == NULL)) {
+ if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
+ (ctxt->str_xml_ns == NULL)) {
xmlErrMemory(ctxt, NULL);
}
}
{
if (ctxt->options & XML_PARSE_NSCLEAN) {
int i;
- for (i = 0;i < ctxt->nsNr;i += 2) {
+ for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
if (ctxt->nsTab[i] == prefix) {
/* in scope */
if (ctxt->nsTab[i + 1] == URL)
* to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings without newlines within the parser.
- * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
+ * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
* defined char within the parser.
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
#define SKIPL(val) do { \
int skipl; \
for(skipl=0; skipl<val; skipl++) { \
- if (*(ctxt->input->cur) == '\n') { \
+ if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
- } else ctxt->input->col++; \
- ctxt->nbChars++; \
+ } else ctxt->input->col++; \
+ ctxt->nbChars++; \
ctxt->input->cur++; \
} \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
xmlGROW (ctxt);
static void xmlGROW (xmlParserCtxtPtr ctxt) {
+ if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
+ ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
+ ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
+ ctxt->instate = XML_PARSER_EOF;
+ }
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
}
ret = inputPush(ctxt, input);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
GROW;
return(ret);
}
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*
* Returns the value parsed (as an int), 0 in case of error
*/
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
- if ((RAW >= '0') && (RAW <= '9'))
+ if ((RAW >= '0') && (RAW <= '9'))
val = val * 16 + (CUR - '0');
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
val = val * 16 + (CUR - 'a') + 10;
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
- if ((RAW >= '0') && (RAW <= '9'))
+ if ((RAW >= '0') && (RAW <= '9'))
val = val * 10 + (CUR - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*
* Returns the value parsed (as an int), 0 in case of error, str will be
* updated to the current value of the index
ptr += 3;
cur = *ptr;
while (cur != ';') { /* Non input consuming loop */
- if ((cur >= '0') && (cur <= '9'))
+ if ((cur >= '0') && (cur <= '9'))
val = val * 16 + (cur - '0');
else if ((cur >= 'a') && (cur <= 'f'))
val = val * 16 + (cur - 'a') + 10;
ptr += 2;
cur = *ptr;
while (cur != ';') { /* Non input consuming loops */
- if ((cur >= '0') && (cur <= '9'))
+ if ((cur >= '0') && (cur <= '9'))
val = val * 10 + (cur - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
*
* Returns the new input stream or NULL
*/
-
+
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
-
+
static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlParserInputPtr input;
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFree(input);
- return(NULL);
+ return(NULL);
}
buffer [0] = ' ';
buffer [1] = '%';
/**
* xmlParserHandlePEReference:
* @ctxt: the parser context
- *
+ *
* [69] PEReference ::= '%' Name ';'
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
* NOTE: misleading but this is handled.
*
* A PEReference may have been detected in the current input stream
- * the handling is done accordingly to
+ * the handling is done accordingly to
* http://www.w3.org/TR/REC-xml#entproc
- * i.e.
+ * i.e.
* - Included in literal in entity values
* - Included as Parameter Entity reference within DTDs
*/
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if (entity == NULL) {
-
+
/*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
- } else
+ } else
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
if (xmlPushInput(ctxt, input) < 0)
return;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
* the amount of data in the buffer.
*/
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
/*
* Macro used to grow the current buffer.
+ * buffer##_size is expected to be a size_t
+ * mem_error: is expected to handle memory allocation failures
*/
#define growBuffer(buffer, n) { \
xmlChar *tmp; \
- buffer##_size *= 2; \
- buffer##_size += n; \
- tmp = (xmlChar *) \
- xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
+ size_t new_size = buffer##_size * 2 + n; \
+ if (new_size < buffer##_size) goto mem_error; \
+ tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
if (tmp == NULL) goto mem_error; \
buffer = tmp; \
+ buffer##_size = new_size; \
}
/**
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
- *
+ *
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int what, xmlChar end, xmlChar end2, xmlChar end3) {
xmlChar *buffer = NULL;
- int buffer_size = 0;
+ size_t buffer_size = 0;
+ size_t nbchars = 0;
xmlChar *current = NULL;
xmlChar *rep = NULL;
const xmlChar *last;
xmlEntityPtr ent;
int c,l;
- int nbchars = 0;
if ((ctxt == NULL) || (str == NULL) || (len < 0))
return(NULL);
* allocate a translation buffer.
*/
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
- buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
+ buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
if (buffer == NULL) goto mem_error;
/*
if (val != 0) {
COPY_BUF(0,buffer,nbchars,val);
}
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
goto int_error;
if (ent != NULL)
- ctxt->nbentities += ent->checked;
+ ctxt->nbentities += ent->checked / 2;
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
COPY_BUF(0,buffer,nbchars,ent->content[0]);
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else {
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
- if (xmlParserEntityCheck(ctxt, nbchars, ent))
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
goto int_error;
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
const xmlChar *cur = ent->name;
buffer[nbchars++] = '&';
- if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
}
for (;i > 0;i--)
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
goto int_error;
if (ent != NULL)
- ctxt->nbentities += ent->checked;
+ ctxt->nbentities += ent->checked / 2;
if (ent != NULL) {
if (ent->content == NULL) {
xmlLoadEntityContent(ctxt, ent);
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
- if (xmlParserEntityCheck(ctxt, nbchars, ent))
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
goto int_error;
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else {
COPY_BUF(l,buffer,nbchars,c);
str += l;
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
if (str < last)
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
- *
+ *
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
} else {
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
(c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
+ (c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))
return(1);
* Handler for more complex cases
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
/*
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
((c >= 0x10000) && (c <= 0xEFFFF))
)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
(c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
+ (c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
}
}
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
+ return(NULL);
+ }
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
in++;
if ((*in > 0) && (*in < 0x80)) {
count = in - ctxt->input->cur;
+ if ((count > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
+ return(NULL);
+ }
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
+ }
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
}
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
}
/**
* xmlParseNCName:
* @ctxt: an XML parser context
- * @len: lenght of the string parsed
+ * @len: length of the string parsed
*
* parse an XML name.
*
in++;
if ((*in > 0) && (*in < 0x80)) {
count = in - ctxt->input->cur;
+ if ((count > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
const xmlChar *ret;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
in = ctxt->input->cur;
while (*in != 0 && *in == *cmp) {
*
* [6] Names ::= Name (#x20 Name)*
*
- * Returns the Name parsed or NULL. The @str pointer
+ * Returns the Name parsed or NULL. The @str pointer
* is updated to the current location in the string.
*/
while (xmlIsNameChar(ctxt, c)) {
if (len + 10 > max) {
xmlChar *tmp;
+
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ xmlFree(buffer);
+ return(NULL);
+ }
max *= 2;
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
return(buffer);
}
}
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
*str = cur;
return(xmlStrndup(buf, len));
}
#endif
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
while (xmlIsNameChar(ctxt, c)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
}
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
if (len >= XML_MAX_NAMELEN) {
/*
* Okay someone managed to make a huge token, so he's ready to pay
}
memcpy(buffer, buf, len);
while (xmlIsNameChar(ctxt, c)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buffer);
+ return(NULL);
+ }
}
if (len + 10 > max) {
xmlChar *tmp;
+ if ((max > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
+ xmlFree(buffer);
+ return(NULL);
+ }
max *= 2;
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
}
if (len == 0)
return(NULL);
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
+ return(NULL);
+ }
return(xmlStrndup(buf, len));
}
ctxt->instate = XML_PARSER_ENTITY_VALUE;
input = ctxt->input;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
NEXT;
c = CUR_CHAR(l);
/*
* When a parameter entity reference appears in a literal entity
* value, ... a single or double quote character in the replacement
* text is always treated as a normal data character and will not
- * terminate the literal.
+ * terminate the literal.
* In practice it means we stop the loop only when back at parsing
* the initial entity and the quote is found
*/
- while ((IS_CHAR(c)) && ((c != stop) || /* checked */
- (ctxt->input != input))) {
+ while (((IS_CHAR(c)) && ((c != stop) || /* checked */
+ (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
if (len + 5 >= size) {
xmlChar *tmp;
}
}
buf[len] = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
/*
* Raise problem w.r.t. '&' and '%' being used in non-entities
*/
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
0, 0, 0);
- if (orig != NULL)
+ if (orig != NULL)
*orig = buf;
else
xmlFree(buf);
}
-
+
return(ret);
}
xmlChar limit = 0;
xmlChar *buf = NULL;
xmlChar *rep = NULL;
- int len = 0;
- int buf_size = 0;
+ size_t len = 0;
+ size_t buf_size = 0;
int c, l, in_space = 0;
xmlChar *current = NULL;
xmlEntityPtr ent;
* allocate a translation buffer.
*/
buf_size = XML_PARSER_BUFFER_SIZE;
- buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
+ buf = (xmlChar *) xmlMallocAtomic(buf_size);
if (buf == NULL) goto mem_error;
/*
* OK loop until we reach one of the ending char or a size limit.
*/
c = CUR_CHAR(l);
- while ((NXT(0) != limit) && /* checked */
- (IS_CHAR(c)) && (c != '<')) {
+ while (((NXT(0) != limit) && /* checked */
+ (IS_CHAR(c)) && (c != '<')) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
+ /*
+ * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
+ * special option is given
+ */
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ goto mem_error;
+ }
if (c == 0) break;
if (c == '&') {
in_space = 0;
if (val == '&') {
if (ctxt->replaceEntities) {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
buf[len++] = '&';
* The reparsing will be done in xmlStringGetNodeList()
* called by the attribute() function in SAX.c
*/
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
buf[len++] = '&';
buf[len++] = ';';
}
} else if (val != 0) {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
len += xmlCopyChar(0, &buf[len], val);
ctxt->nbentities += ent->owner;
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
if ((ctxt->replaceEntities == 0) &&
} else {
buf[len++] = ent->content[0];
}
- } else if ((ent != NULL) &&
+ } else if ((ent != NULL) &&
(ctxt->replaceEntities != 0)) {
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
rep = xmlStringDecodeEntities(ctxt, ent->content,
current++;
} else
buf[len++] = *current++;
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
}
rep = NULL;
}
} else {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
if (ent->content != NULL)
* entities problems
*/
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
- (ent->content != NULL)) {
+ (ent->content != NULL) && (ent->checked == 0)) {
+ unsigned long oldnbent = ctxt->nbentities;
+
rep = xmlStringDecodeEntities(ctxt, ent->content,
XML_SUBSTITUTE_REF, 0, 0, 0);
+
+ ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
if (rep != NULL) {
+ if (xmlStrchr(rep, '<'))
+ ent->checked |= 1;
xmlFree(rep);
rep = NULL;
}
* Just output the reference
*/
buf[len++] = '&';
- while (len > buf_size - i - 10) {
+ while (len + i + 10 > buf_size) {
growBuffer(buf, i + 10);
}
for (;i > 0;i--)
if ((len != 0) || (!normalize)) {
if ((!normalize) || (!in_space)) {
COPY_BUF(l,buf,len,0x20);
- while (len > buf_size - 10) {
+ while (len + 10 > buf_size) {
growBuffer(buf, 10);
}
}
} else {
in_space = 0;
COPY_BUF(l,buf,len,c);
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
}
GROW;
c = CUR_CHAR(l);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto error;
+
if ((in_space) && (normalize)) {
- while (buf[len - 1] == 0x20) len--;
+ while ((len > 0) && (buf[len - 1] == 0x20)) len--;
}
buf[len] = 0;
if (RAW == '<') {
}
} else
NEXT;
- if (attlen != NULL) *attlen = len;
+
+ /*
+ * There we potentially risk an overflow, don't allow attribute value of
+ * length more than INT_MAX it is a very reasonnable assumption !
+ */
+ if (len >= INT_MAX) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ goto mem_error;
+ }
+
+ if (attlen != NULL) *attlen = (int) len;
return(buf);
mem_error:
xmlErrMemory(ctxt, NULL);
+error:
if (buf != NULL)
xmlFree(buf);
if (rep != NULL)
*
* 3.3.3 Attribute-Value Normalization:
* Before the value of an attribute is passed to the application or
- * checked for validity, the XML processor must normalize it as follows:
+ * checked for validity, the XML processor must normalize it as follows:
* - a character reference is processed by appending the referenced
* character to the attribute value
* - an entity reference is processed by recursively processing the
- * replacement text of the entity
+ * replacement text of the entity
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
* appending #x20 to the normalized value, except that only a single
* #x20 is appended for a "#xD#xA" sequence that is part of an external
- * parsed entity or the literal entity value of an internal parsed entity
- * - other characters are processed by appending them to the normalized value
+ * parsed entity or the literal entity value of an internal parsed entity
+ * - other characters are processed by appending them to the normalized value
* If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
- * (#x20) characters by a single space (#x20) character.
+ * (#x20) characters by a single space (#x20) character.
* All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA.
*
/**
* xmlParseSystemLiteral:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Literal
*
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
return(NULL);
}
-
+
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
if (len + 5 >= size) {
xmlChar *tmp;
+ if ((size > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
+ xmlFree(buf);
+ ctxt->instate = (xmlParserInputState) state;
+ return(NULL);
+ }
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
if (len + 1 >= size) {
xmlChar *tmp;
+ if ((size > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
+ xmlFree(buf);
+ return(NULL);
+ }
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
NEXT;
cur = CUR;
* The right angle bracket (>) may be represented using the string ">",
* and must, for compatibility, be escaped using ">" or a character
* reference when it appears in the string "]]>" in content, when that
- * string is not marking the end of a CDATA section.
+ * string is not marking the end of a CDATA section.
*
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
*/
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
in = ctxt->input->cur;
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
nbchar = 0;
GROW;
cur = CUR_CHAR(l);
while ((cur != '<') && /* checked */
- (cur != '&') &&
+ (cur != '&') &&
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
if ((cur == ']') && (NXT(1) == ']') &&
(NXT(2) == '>')) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
}
NEXTL(l);
cur = CUR_CHAR(l);
}
} else {
/*
- * We handle [83] so we return immediately, if
+ * We handle [83] so we return immediately, if
* "S SystemLiteral" is not detected. From a purely parsing
* point of view that's a nice mess.
*/
ptr = CUR_PTR;
if (!IS_BLANK_CH(*ptr)) return(NULL);
-
+
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
}
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
static void
-xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
+xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
+ size_t len, size_t size) {
int q, ql;
int r, rl;
int cur, l;
- int count = 0;
+ size_t count = 0;
int inputid;
inputid = ctxt->input->id;
if ((r == '-') && (q == '-')) {
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
if (len + 5 >= size) {
xmlChar *new_buf;
- size *= 2;
- new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ size_t new_size;
+
+ new_size = size * 2;
+ new_buf = (xmlChar *) xmlRealloc(buf, new_size);
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
return;
}
buf = new_buf;
+ size = new_size;
}
COPY_BUF(ql,buf,len,q);
q = r;
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
}
NEXTL(l);
cur = CUR_CHAR(l);
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
- int size = XML_PARSER_BUFFER_SIZE;
- int len = 0;
+ size_t size = XML_PARSER_BUFFER_SIZE;
+ size_t len = 0;
xmlParserInputState state;
const xmlChar *in;
- int nbchar = 0, ccol;
+ size_t nbchar = 0;
+ int ccol;
int inputid;
/*
buf[len] = 0;
}
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
ctxt->input->cur = in;
if (*in == 0xA) {
in++;
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
in = ctxt->input->cur;
if (*in == '-') {
if (in[1] == '-') {
}
if (buf != NULL)
xmlFree(buf);
- ctxt->instate = state;
+ if (ctxt->instate != XML_PARSER_EOF)
+ ctxt->instate = state;
return;
}
if (buf != NULL) {
/**
* xmlParsePITarget:
* @ctxt: an XML parser context
- *
+ *
* parse the name of a PI
*
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
NULL, NULL);
}
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
- xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
}
return(name);
* xmlParseCatalogPI:
* @ctxt: an XML parser context
* @catalog: the PI value string
- *
+ *
* parse an XML Catalog Processing Instruction.
*
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
/**
* xmlParsePI:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Processing Instruction.
*
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
void
xmlParsePI(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
- int len = 0;
- int size = XML_PARSER_BUFFER_SIZE;
+ size_t len = 0;
+ size_t size = XML_PARSER_BUFFER_SIZE;
int cur, l;
const xmlChar *target;
xmlParserInputState state;
((cur != '?') || (NXT(1) != '>'))) {
if (len + 5 >= size) {
xmlChar *tmp;
-
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ size_t new_size = size * 2;
+ tmp = (xmlChar *) xmlRealloc(buf, new_size);
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFree(buf);
return;
}
buf = tmp;
+ size = new_size;
}
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "PI %s too big found", target);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
cur = CUR_CHAR(l);
}
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "PI %s too big found", target);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
buf[len] = 0;
if (cur != '?') {
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
const xmlChar *name;
xmlChar *Pubid;
xmlChar *Systemid;
-
+
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
xmlParserInputPtr input = ctxt->input;
SHRINK;
return;
}
if (xmlStrchr(name, ':') != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
"colon are forbidden from notation names '%s'\n",
name, NULL, NULL);
}
int isParameter = 0;
xmlChar *orig = NULL;
int skipped;
-
+
/* GROW; done in the caller */
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
xmlParserInputPtr input = ctxt->input;
return;
}
if (xmlStrchr(name, ':') != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
"colon are forbidden from entities names '%s'\n",
name, NULL, NULL);
}
}
}
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
SKIP_BLANKS;
if (RAW != '>') {
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
*
* [ VC: Fixed Attribute Default ]
* if an attribute has a default value declared with the #FIXED
- * keyword, instances of that attribute must match the default value.
+ * keyword, instances of that attribute must match the default value.
*
* [ WFC: No < in Attribute Values ]
* handled in xmlParseAttValue()
*
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
- * or XML_ATTRIBUTE_FIXED.
+ * or XML_ATTRIBUTE_FIXED.
*/
int
*
* [ VC: Notation Attributes ]
* Values of this type must match one of the notation names included
- * in the declaration; all notation names in the declaration must be declared.
+ * in the declaration; all notation names in the declaration must be declared.
*
* Returns: the notation attribute tree built while parsing
*/
* [ VC: Entity Name ]
* Values of type ENTITY must match the Name production, values
* of type ENTITIES must match Names; each Entity Name must match the
- * name of an unparsed entity declared in the DTD.
+ * name of an unparsed entity declared in the DTD.
*
* [ VC: Name Token ]
* Values of type NMTOKEN must match the Nmtoken production; values
- * of type NMTOKENS must match Nmtokens.
+ * of type NMTOKENS must match Nmtokens.
*
* Returns the attribute type
*/
-int
+int
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
SHRINK;
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
}
SKIP_BLANKS;
GROW;
- while (RAW != '>') {
+ while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
int type;
int def;
xmlFreeEnumeration(tree);
if ((ctxt->sax2) && (defaultValue != NULL) &&
- (def != XML_ATTRIBUTE_IMPLIED) &&
+ (def != XML_ATTRIBUTE_IMPLIED) &&
(def != XML_ATTRIBUTE_REQUIRED)) {
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
}
*
* parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
- *
+ *
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
* '(' S? '#PCDATA' S? ')'
*
*
* [ VC: No Duplicate Types ]
* The same name must not appear more than once in a single
- * mixed-content declaration.
+ * mixed-content declaration.
*
* returns: the list of the xmlElementContentPtr describing the element choices
*/
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
if (ret == NULL) return(NULL);
}
- while (RAW == '|') {
+ while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
NEXT;
if (elem == NULL) {
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
*
* parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
- *
+ *
*
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
*
* be empty, and neither the first nor last non-blank character of
* the replacement text should be a connector (| or ,).
*
- * Returns the tree of xmlElementContentPtr describing the element
+ * Returns the tree of xmlElementContentPtr describing the element
* hierarchy.
*/
static xmlElementContentPtr
}
SKIP_BLANKS;
SHRINK;
- while (RAW != ')') {
+ while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
/*
* Each loop we parse one separator and one element.
*/
*
* parse the declaration for an Element content either Mixed or Children,
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
- *
+ *
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
*
* returns: the type of element content XML_ELEMENT_TYPE_xxx
}
NEXT;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
SKIP_BLANKS;
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element declaration doesn't start and stop in the same entity\n");
}
-
+
NEXT;
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
(ctxt->sax->elementDecl != NULL)) {
/*
* this is a trick: if xmlAddElementDecl is called,
* instead of copying the full tree it is plugged directly
- * if called from the parser. Avoid duplicating the
+ * if called from the parser. Avoid duplicating the
* interfaces or change the API/ABI
*/
xmlFreeDocElementContent(ctxt->myDoc, content);
* xmlParseConditionalSections
* @ctxt: an XML parser context
*
- * [61] conditionalSect ::= includeSect | ignoreSect
- * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+ * [61] conditionalSect ::= includeSect | ignoreSect
+ * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
"Entering INCLUDE Conditional Section\n");
}
- while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
- (NXT(2) != '>'))) {
+ while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
+ (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_IGNORE;
- while ((depth >= 0) && (RAW != 0)) {
+ while (((depth >= 0) && (RAW != 0)) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
depth++;
SKIP(3);
/**
* xmlParseMarkupDecl:
* @ctxt: an XML parser context
- *
+ *
* parse Markup declarations
*
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
* In the internal DTD subset, parameter-entity references can occur
* only where markup declarations can occur, not within markup declarations.
* (This does not apply to references that occur in external parameter
- * entities or to the external subset.)
+ * entities or to the external subset.)
*/
void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
* @ctxt: an XML parser context
* @ExternalID: the external identifier
* @SystemID: the system identifier (or URL)
- *
+ *
* parse Markup declarations from an external subset
*
* [30] extSubset ::= textDecl? extSubsetDecl
break;
}
}
-
+
if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
}
* The first reference to the entity trigger a parsing phase
* where the ent->children is filled with the result from
* the parsing.
- */
- if (ent->checked == 0) {
+ * Note: external parsed entities will not be loaded, it is not
+ * required for a non-validating parser, unless the parsing option
+ * of validating, or substituting entities were given. Doing so is
+ * far more secure as the parser will only process data coming from
+ * the document entity by default.
+ */
+ if ((ent->checked == 0) &&
+ ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
+ (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
unsigned long oldnbent = ctxt->nbentities;
/*
* Store the number of entities needing parsing for this entity
* content and do checkings
*/
- ent->checked = ctxt->nbentities - oldnbent;
+ ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
+ if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
+ ent->checked |= 1;
if (ret == XML_ERR_ENTITY_LOOP) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
xmlFreeNodeList(list);
return;
}
- if (xmlParserEntityCheck(ctxt, 0, ent)) {
+ if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
xmlFreeNodeList(list);
return;
}
list = NULL;
}
if (ent->checked == 0)
- ent->checked = 1;
+ ent->checked = 2;
} else if (ent->checked != 1) {
- ctxt->nbentities += ent->checked;
+ ctxt->nbentities += ent->checked / 2;
}
/*
* Seems we are generating the DOM content, do
* a simple tree copy for all references except the first
* In the first occurrence list contains the replacement.
- * progressive == 2 means we are operating on the Reader
- * and since nodes are discarded we must copy all the time.
*/
if (((list == NULL) && (ent->owner == 0)) ||
(ctxt->parseMode == XML_PARSE_READER)) {
xmlNodePtr nw = NULL, cur, firstChild = NULL;
/*
+ * We are copying here, make sure there is no abuse
+ */
+ ctxt->sizeentcopy += ent->length;
+ if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
+ return;
+
+ /*
* when operating on a reader, the entities definitions
* are always owning the entities subtree.
if (ctxt->parseMode == XML_PARSE_READER)
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
xmlAddEntityReference(ent, firstChild, nw);
#endif /* LIBXML_LEGACY_ENABLED */
- } else if (list == NULL) {
+ } else if ((list == NULL) || (ctxt->inputNr > 0)) {
xmlNodePtr nw = NULL, cur, next, last,
firstChild = NULL;
+
+ /*
+ * We are copying here, make sure there is no abuse
+ */
+ ctxt->sizeentcopy += ent->length;
+ if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
+ return;
+
/*
* Copy the entity child list and make it the new
* entity child list. The goal is to make sure any
xmlEntityPtr ent = NULL;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (RAW != '&')
return(NULL);
NEXT;
/*
- * Predefined entites override any extra definition
+ * Predefined entities override any extra definition
*/
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
ent = xmlGetPredefinedEntity(name);
}
/*
- * Increate the number of entity references parsed
+ * Increase the number of entity references parsed
*/
ctxt->nbentities++;
if (ctxt->sax != NULL) {
if (ctxt->sax->getEntity != NULL)
ent = ctxt->sax->getEntity(ctxt->userData, name);
- if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
+ if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
(ctxt->options & XML_PARSE_OLDSAX))
ent = xmlGetPredefinedEntity(name);
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
ent = xmlSAX2GetEntity(ctxt, name);
}
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
/*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an
* [ WFC: No < in Attribute Values ]
* The replacement text of any entity referred to directly or
* indirectly in an attribute value (other than "<") must
- * not contain a <.
+ * not contain a <.
*/
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
- (ent != NULL) && (ent->content != NULL) &&
- (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
- (xmlStrchr(ent->content, '<'))) {
- xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
- "'<' in entity '%s' is not allowed in attributes values\n", name);
+ (ent != NULL) &&
+ (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
+ if ((ent->checked & 1) || ((ent->checked == 0) &&
+ (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
+ "'<' in entity '%s' is not allowed in attributes values\n", name);
+ }
}
/*
/*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive reference
- * to itself, either directly or indirectly.
+ * to itself, either directly or indirectly.
* Done somewhere else
*/
return(ent);
ent = xmlSAX2GetEntity(ctxt, name);
}
}
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(name);
+ return(NULL);
+ }
/*
* [ WFC: Entity Declared ]
* is not obligated to read and process their declarations;
* for such documents, the rule that an entity must be
* declared is a well-formedness constraint only if
- * standalone='yes'.
+ * standalone='yes'.
*/
if (ent == NULL) {
if ((ctxt->standalone == 1) ||
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
*/
if ((ctxt->sax != NULL) &&
(ctxt->sax->getParameterEntity != NULL))
- entity = ctxt->sax->getParameterEntity(ctxt->userData,
- name);
+ entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if (entity == NULL) {
/*
* [ WFC: Entity Declared ]
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
(IS_CHAR(c))) {
xmlBufferAdd(buf, ctxt->input->cur, l);
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
}
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
+ c = CUR_CHAR(l);
+ }
}
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
*/
if ((ctxt->sax != NULL) &&
(ctxt->sax->getParameterEntity != NULL))
- entity = ctxt->sax->getParameterEntity(ctxt->userData,
- name);
+ entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(name);
+ return(NULL);
+ }
if (entity == NULL) {
/*
* [ WFC: Entity Declared ]
*
* parse a DOCTYPE declaration
*
- * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
+ * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match the element
- * type of the root element.
+ * type of the root element.
*/
void
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
/*
* Is there any internal subset declarations ?
ctxt->instate = XML_PARSER_DTD;
NEXT;
/*
- * Parse the succession of Markup declarations and
+ * Parse the succession of Markup declarations and
* PEReferences.
* Subsequence (markupdecl | PEReference | S)*
*/
- while (RAW != ']') {
+ while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
break;
}
}
- if (RAW == ']') {
+ if (RAW == ']') {
NEXT;
SKIP_BLANKS;
}
*
* [ WFC: No < in Attribute Values ]
* The replacement text of any entity referred to directly or indirectly in
- * an attribute value (other than "<") must not contain a <.
- *
+ * an attribute value (other than "<") must not contain a <.
+ *
* [ VC: Attribute Value Type ]
* The attribute must have been declared; the value must be of the type
* declared for it.
/**
* xmlParseStartTag:
* @ctxt: an XML parser context
- *
+ *
* parse a start of tag either for rule element or
* EmptyElement. In both case we don't parse the tag closing chars.
*
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* With namespace:
*
SKIP_BLANKS;
GROW;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
+ * start-tag or empty-element tag.
*/
for (i = 0; i < nbatts;i += 2) {
if (xmlStrEqual(atts[i], attname)) {
xmlFree(attvalue);
}
-failed:
+failed:
GROW
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
/*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
if (name != (xmlChar*)1) {
if (CUR == ':') {
l = xmlParseName(ctxt);
if (l != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_QNAME,
+ xmlNsErr(ctxt, XML_NS_ERR_QNAME,
"Failed to parse QName '%s'\n", l, NULL, NULL);
*prefix = NULL;
return(l);
cmp = prefix;
while (*in != 0 && *in == *cmp) {
- ++in;
+ ++in;
++cmp;
}
if ((*cmp == 0) && (*in == ':')) {
*
* 3.3.3 Attribute-Value Normalization:
* Before the value of an attribute is passed to the application or
- * checked for validity, the XML processor must normalize it as follows:
+ * checked for validity, the XML processor must normalize it as follows:
* - a character reference is processed by appending the referenced
* character to the attribute value
* - an entity reference is processed by recursively processing the
- * replacement text of the entity
+ * replacement text of the entity
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
* appending #x20 to the normalized value, except that only a single
* #x20 is appended for a "#xD#xA" sequence that is part of an external
- * parsed entity or the literal entity value of an internal parsed entity
- * - other characters are processed by appending them to the normalized value
+ * parsed entity or the literal entity value of an internal parsed entity
+ * - other characters are processed by appending them to the normalized value
* If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
- * (#x20) characters by a single space (#x20) character.
+ * (#x20) characters by a single space (#x20) character.
* All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA.
*
/*
* Skip any leading spaces
*/
- while ((in < end) && (*in != limit) &&
+ while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
in++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
last = in;
* skip the trailing blanks
*/
while ((last[-1] == 0x20) && (last > start)) last--;
- while ((in < end) && (*in != limit) &&
+ while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
in++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
last = last + delta;
}
end = ctxt->input->end;
- }
- }
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
+ }
+ }
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
if (*in != limit) goto need_complex;
} else {
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
}
}
last = in;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue length too long\n");
+ return(NULL);
+ }
if (*in != limit) goto need_complex;
}
in++;
/**
* xmlParseStartTag2:
* @ctxt: an XML parser context
- *
+ *
* parse a start of tag either for rule element or
* EmptyElement. In both case we don't parse the tag closing chars.
* This routine is called when running SAX2 parsing
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* With namespace:
*
GROW;
if (ctxt->input->base != base) goto base_changed;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
int len = -1, alloc = 0;
failed:
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ break;
if (ctxt->input->base != base) goto base_changed;
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
break;
atts[nbatts++] = defaults->values[5 * i + 3];
if ((ctxt->standalone == 1) &&
(defaults->values[5 * i + 4] != NULL)) {
- xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
+ xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
"standalone: attribute %s on %s defaulted from external subset\n",
attname, localname);
}
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
+ * start-tag or empty-element tag.
* As extended by the Namespace in XML REC.
*/
for (j = 0; j < i;j += 5) {
* We should definitely be at the ending "S? '>'" part
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
SKIP_BLANKS;
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
/*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
if (name != (xmlChar*)1) {
/**
* xmlParseCDSect:
* @ctxt: an XML parser context
- *
+ *
* Parse escaped pure raw content.
*
* [18] CDSect ::= CDStart CData CDEnd
if (len + 5 >= size) {
xmlChar *tmp;
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ if ((size > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
+ "CData section too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
+ tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
if (tmp == NULL) {
xmlFree(buf);
xmlErrMemory(ctxt, NULL);
return;
}
buf = tmp;
+ size *= 2;
}
COPY_BUF(rl,buf,len,r);
r = s;
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
}
NEXTL(l);
/*
* Fifth case : a reference. If if has not been resolved,
- * parsing returns it's Name, create the node
+ * parsing returns it's Name, create the node
*/
else if (*cur == '&') {
*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match the element
- * type of the root element.
+ * type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
* Parse the content of the element:
*/
xmlParseContent(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if (!IS_BYTE_CHAR(RAW)) {
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
"Premature end of data in tag %s line %d\n",
/**
* xmlParseEncodingDecl:
* @ctxt: an XML parser context
- *
+ *
* parse the XML encoding declaration
*
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
/*
* If no encoding was passed to the parser, that we are
- * using UTF-16 and no decoder is present i.e. the
+ * using UTF-16 and no decoder is present i.e. the
* document is apparently UTF-8 compatible, then raise an
* encoding mismatch fatal error
*/
* parse the XML standalone declaration
*
* [32] SDDecl ::= S 'standalone' Eq
- * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
+ * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
*
* [ VC: Standalone Document Declaration ]
* TODO The standalone document declaration must have the value "no"
/**
* xmlParseXMLDecl:
* @ctxt: an XML parser context
- *
+ *
* parse an XML declaration header
*
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
/**
* xmlParseMisc:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Misc* optional field.
*
* [27] Misc ::= Comment | PI | S
void
xmlParseMisc(xmlParserCtxtPtr ctxt) {
- while (((RAW == '<') && (NXT(1) == '?')) ||
- (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
- IS_BLANK_CH(CUR)) {
+ while ((ctxt->instate != XML_PARSER_EOF) &&
+ (((RAW == '<') && (NXT(1) == '?')) ||
+ (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
+ IS_BLANK_CH(CUR))) {
if ((RAW == '<') && (NXT(1) == '?')) {
xmlParsePI(ctxt);
} else if (IS_BLANK_CH(CUR)) {
/**
* xmlParseDocument:
* @ctxt: an XML parser context
- *
+ *
* parse an XML document (and build a tree if using the standard SAX
* interface).
*
*/
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
if ((ctxt->encoding == NULL) &&
((ctxt->input->end - ctxt->input->cur) >= 4)) {
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
}
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
/*
* The Misc part of the Prolog
if (RAW == '[') {
ctxt->instate = XML_PARSER_DTD;
xmlParseInternalSubset(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
}
/*
(!ctxt->disableSAX))
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
ctxt->extSubSystem, ctxt->extSubURI);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
ctxt->inSubset = 0;
xmlCleanSpecialAttr(ctxt);
/**
* xmlParseExtParsedEnt:
* @ctxt: an XML parser context
- *
+ *
* parse a general parsed entity
* An external general parsed entity is well-formed if it matches the
* production labeled extParsedEnt.
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
}
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
/*
* Doing validity checking on chunk doesn't make sense
ctxt->depth = 0;
xmlParseContent(ctxt);
-
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
+
if ((RAW == '<') && (NXT(1) == '/')) {
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
} else if (RAW != 0) {
#ifdef LIBXML_PUSH_ENABLED
/************************************************************************
* *
- * Progressive parsing interfaces *
+ * Progressive parsing interfaces *
* *
************************************************************************/
buf = in->base;
len = in->length;
} else {
- buf = in->buf->buffer->content;
- len = in->buf->buffer->use;
+ buf = xmlBufContent(in->buf->buffer);
+ len = xmlBufUse(in->buf->buffer);
}
/* take into account the sequence length */
if (third) len -= 2;
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c' found at %d\n",
first, next, base);
- else
+ else
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c%c' found at %d\n",
first, next, third, base);
else if (third == 0)
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c' failed\n", first, next);
- else
+ else
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c%c' failed\n", first, next, third);
#endif
if ((utf == NULL) || (len <= 0))
return(0);
-
+
for (ix = 0; ix < len;) { /* string is 0-terminated */
c = utf[ix];
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
}
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
- while (1) {
+ while (ctxt->instate != XML_PARSER_EOF) {
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(0);
-
+
/*
* Pop-up of finished entities.
*/
/*
* If we are operating on converted input, try to flush
* remainng chars to avoid them stalling in the non-converted
- * buffer.
+ * buffer. But do not do this in document start where
+ * encoding="..." may not have been read and we work on a
+ * guessed encoding.
*/
- if ((ctxt->input->buf->raw != NULL) &&
- (ctxt->input->buf->raw->use > 0)) {
- int base = ctxt->input->base -
- ctxt->input->buf->buffer->content;
- int current = ctxt->input->cur - ctxt->input->base;
+ if ((ctxt->instate != XML_PARSER_START) &&
+ (ctxt->input->buf->raw != NULL) &&
+ (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
+ ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + current;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[
- ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
+ base, current);
}
- avail = ctxt->input->buf->buffer->use -
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
}
if (avail < 1)
if (avail < 4)
goto done;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines,
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match
- * the element type of the root element.
+ * the element type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
ctxt->sax->endElement(ctxt->userData, name);
#endif /* LIBXML_SAX1_ENABLED */
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
spacePop(ctxt);
if (ctxt->nameNr == 0) {
ctxt->instate = XML_PARSER_EPILOG;
} else {
ctxt->instate = XML_PARSER_CONTENT;
}
+ ctxt->progressive = 1;
break;
}
if (RAW == '>') {
#endif /* LIBXML_SAX1_ENABLED */
ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
break;
}
case XML_PARSER_CONTENT: {
break;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
xmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next != '!')) {
ctxt->instate = XML_PARSER_START_TAG;
break;
ctxt->input->cur += 4;
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
ctxt->input->cur -= 4;
- if ((!terminate) && (term < 0))
+ if ((!terminate) && (term < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
(ctxt->input->cur[2] == '[') &&
(ctxt->input->cur[3] == 'C') &&
break;
case XML_PARSER_CDATA_SECTION: {
/*
- * The Push mode need to have the SAX callback for
+ * The Push mode need to have the SAX callback for
* cdataBlock merge back contiguous callbacks.
*/
int base;
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
int tmp;
- tmp = xmlCheckCdataPush(ctxt->input->cur,
+ tmp = xmlCheckCdataPush(ctxt->input->cur,
XML_PARSER_BIG_BUFFER_SIZE);
if (tmp < 0) {
tmp = -tmp;
ctxt->sax->characters(ctxt->userData,
ctxt->input->cur, tmp);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
SKIPL(tmp);
ctxt->checkIndex = 0;
}
ctxt->sax->characters(ctxt->userData,
ctxt->input->cur, base);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
SKIPL(base + 3);
ctxt->checkIndex = 0;
ctxt->instate = XML_PARSER_CONTENT;
avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use -
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
+ ctxt->instate = XML_PARSER_MISC;
+ ctxt->progressive = 1;
ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_MISC;
+ ctxt->progressive = 1;
ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == 'D') &&
(ctxt->input->cur[7] == 'P') &&
(ctxt->input->cur[8] == 'E')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
+ ctxt->progressive = XML_PARSER_DTD;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing internal subset\n");
#endif
ctxt->inSubset = 1;
+ ctxt->progressive = 0;
+ ctxt->checkIndex = 0;
xmlParseDocTypeDecl(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
if (RAW == '[') {
ctxt->instate = XML_PARSER_DTD;
#ifdef DEBUG_PUSH
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
- ctxt->progressive = 1;
+ ctxt->progressive = XML_PARSER_START_TAG;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 2)
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
+ if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
+ ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
if (ctxt->progressive == 0)
- ctxt->progressive = 1;
+ ctxt->progressive = XML_PARSER_START_TAG;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_EPILOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_EPILOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
int base, i;
xmlChar *buf;
xmlChar quote = 0;
+ size_t use;
base = ctxt->input->cur - ctxt->input->base;
if (base < 0) return(0);
if (ctxt->checkIndex > base)
base = ctxt->checkIndex;
- buf = ctxt->input->buf->buffer->content;
- for (;(unsigned int) base < ctxt->input->buf->buffer->use;
- base++) {
+ buf = xmlBufContent(ctxt->input->buf->buffer);
+ use = xmlBufUse(ctxt->input->buf->buffer);
+ for (;(unsigned int) base < use; base++) {
if (quote != 0) {
if (buf[base] == quote)
quote = 0;
- continue;
+ continue;
}
if ((quote == 0) && (buf[base] == '<')) {
int found = 0;
/* special handling of comments */
- if (((unsigned int) base + 4 <
- ctxt->input->buf->buffer->use) &&
+ if (((unsigned int) base + 4 < use) &&
(buf[base + 1] == '!') &&
(buf[base + 2] == '-') &&
(buf[base + 3] == '-')) {
- for (;(unsigned int) base + 3 <
- ctxt->input->buf->buffer->use; base++) {
+ for (;(unsigned int) base + 3 < use; base++) {
if ((buf[base] == '-') &&
(buf[base + 1] == '-') &&
(buf[base + 2] == '>')) {
fprintf(stderr, "%c%c%c%c: ", buf[base],
buf[base + 1], buf[base + 2], buf[base + 3]);
#endif
- if ((unsigned int) base +1 >=
- ctxt->input->buf->buffer->use)
+ if ((unsigned int) base +1 >= use)
break;
if (buf[base + 1] == ']') {
/* conditional crap, skip both ']' ! */
base++;
continue;
}
- for (i = 1;
- (unsigned int) base + i < ctxt->input->buf->buffer->use;
- i++) {
+ for (i = 1; (unsigned int) base + i < use; i++) {
if (buf[base + i] == '>') {
#if 0
fprintf(stderr, "found\n");
fprintf(stderr, "end of stream\n");
#endif
break;
-
+
}
not_end_of_int_subset:
continue; /* for */
/*
* We didn't found the end of the Internal subset
*/
+ if (quote == 0)
+ ctxt->checkIndex = base;
+ else
+ ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
if (next == 0)
xmlGenericError(xmlGenericErrorContext,
goto done;
found_end_int_subset:
+ ctxt->checkIndex = 0;
xmlParseInternalSubset(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->inSubset = 2;
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
(ctxt->sax->externalSubset != NULL))
ctxt->extSubSystem, ctxt->extSubURI);
ctxt->inSubset = 0;
xmlCleanSpecialAttr(ctxt);
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
ctxt->instate = XML_PARSER_PROLOG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
break;
}
}
-done:
+done:
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
#endif
}
/**
+ * xmlParseCheckTransition:
+ * @ctxt: an XML parser context
+ * @chunk: a char array
+ * @size: the size in byte of the chunk
+ *
+ * Check depending on the current parser state if the chunk given must be
+ * processed immediately or one need more data to advance on parsing.
+ *
+ * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
+ */
+static int
+xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
+ if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
+ return(-1);
+ if (ctxt->instate == XML_PARSER_START_TAG) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->progressive == XML_PARSER_COMMENT) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->progressive == XML_PARSER_PI) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->instate == XML_PARSER_END_TAG) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if ((ctxt->progressive == XML_PARSER_DTD) ||
+ (ctxt->instate == XML_PARSER_DTD)) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ return(1);
+}
+
+/**
* xmlParseChunk:
* @ctxt: an XML parser context
* @chunk: an char array
int terminate) {
int end_in_lf = 0;
int remain = 0;
+ size_t old_avail = 0;
+ size_t avail = 0;
if (ctxt == NULL)
return(XML_ERR_INTERNAL_ERROR);
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(ctxt->errNo);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
if (ctxt->instate == XML_PARSER_START)
xmlDetectSAX2(ctxt);
if ((size > 0) && (chunk != NULL) && (!terminate) &&
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
int res;
+ old_avail = xmlBufUse(ctxt->input->buf->buffer);
/*
* Specific handling if we autodetected an encoding, we should not
* push more than the first line ... which depend on the encoding
remain = 0;
}
}
- res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+ res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
if (res < 0) {
ctxt->errNo = XML_PARSER_EOF;
ctxt->disableSAX = 1;
return (XML_PARSER_EOF);
}
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
if ((in->encoder != NULL) && (in->buffer != NULL) &&
(in->raw != NULL)) {
int nbchars;
+ size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
- nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+ nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
/* TODO 2.6.0 */
xmlGenericError(xmlGenericErrorContext,
"xmlParseChunk: encoder error\n");
return(XML_ERR_INVALID_ENCODING);
}
+ xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
}
}
}
- if (remain != 0)
+ if (remain != 0) {
xmlParseTryOrFinish(ctxt, 0);
- else
- xmlParseTryOrFinish(ctxt, terminate);
+ } else {
+ if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
+ avail = xmlBufUse(ctxt->input->buf->buffer);
+ /*
+ * Depending on the current state it may not be such
+ * a good idea to try parsing if there is nothing in the chunk
+ * which would be worth doing a parser state transition and we
+ * need to wait for more data
+ */
+ if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
+ (old_avail == 0) || (avail == 0) ||
+ (xmlParseCheckTransition(ctxt,
+ (const char *)&ctxt->input->base[old_avail],
+ avail - old_avail)))
+ xmlParseTryOrFinish(ctxt, terminate);
+ }
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(ctxt->errNo);
+
+ if ((ctxt->input != NULL) &&
+ (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
+ ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
+ ctxt->instate = XML_PARSER_EOF;
+ }
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(ctxt->errNo);
}
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
+ ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
+
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
+
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
+ base, current);
}
if (terminate) {
/*
* Check for termination
*/
- int avail = 0;
+ int cur_avail = 0;
if (ctxt->input != NULL) {
if (ctxt->input->buf == NULL)
- avail = ctxt->input->length -
- (ctxt->input->cur - ctxt->input->base);
+ cur_avail = ctxt->input->length -
+ (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use -
- (ctxt->input->cur - ctxt->input->base);
+ cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
}
-
+
if ((ctxt->instate != XML_PARSER_EOF) &&
(ctxt->instate != XML_PARSER_EPILOG)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
- }
- if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
+ }
+ if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
}
if (ctxt->instate != XML_PARSER_EOF) {
}
ctxt->instate = XML_PARSER_EOF;
}
- return((xmlParserErrors) ctxt->errNo);
+ if (ctxt->wellFormed == 0)
+ return((xmlParserErrors) ctxt->errNo);
+ else
+ return(0);
}
/************************************************************************
* *
- * I/O front end functions to the parser *
+ * I/O front end functions to the parser *
* *
************************************************************************/
*/
xmlParserCtxtPtr
-xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
+xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
const char *chunk, int size, const char *filename) {
xmlParserCtxtPtr ctxt;
xmlParserInputPtr inputStream;
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
if (user_data != NULL)
ctxt->userData = user_data;
- }
+ }
if (filename == NULL) {
ctxt->directory = NULL;
} else {
}
}
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
-
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
inputPush(ctxt, inputStream);
/*
if ((size == 0) || (chunk == NULL)) {
ctxt->charset = XML_CHAR_ENCODING_NONE;
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
- xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+ xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
*
* Blocks further parser processing
*/
-void
+void
xmlStopParser(xmlParserCtxtPtr ctxt) {
if (ctxt == NULL)
return;
ctxt->instate = XML_PARSER_EOF;
+ ctxt->errNo = XML_ERR_USER_STOP;
ctxt->disableSAX = 1;
if (ctxt->input != NULL) {
ctxt->input->cur = BAD_CAST"";
#ifdef LIBXML_VALID_ENABLED
/************************************************************************
* *
- * Front ends when parsing a DTD *
+ * Front ends when parsing a DTD *
* *
************************************************************************/
* @enc: the charset encoding if known
*
* Load and parse a DTD
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
* @input will be freed by the function in any case.
*/
/*
* Set-up the SAX context
*/
- if (sax != NULL) {
+ if (sax != NULL) {
if (ctxt->sax != NULL)
xmlFree(ctxt->sax);
ctxt->sax = sax;
if ((enc == XML_CHAR_ENCODING_NONE) &&
((ctxt->input->end - ctxt->input->cur) >= 4)) {
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
}
if (sax != NULL) ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
* @SystemID: a NAME* containing the URL to the DTD
*
* Load and parse an external subset.
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
*/
/*
* Set-up the SAX context
*/
- if (sax != NULL) {
+ if (sax != NULL) {
if (ctxt->sax != NULL)
xmlFree(ctxt->sax);
ctxt->sax = sax;
ctxt->userData = ctxt;
}
-
+
/*
* Canonicalise the system ID
*/
/************************************************************************
* *
- * Front ends when parsing an Entity *
+ * Front ends when parsing an Entity *
* *
************************************************************************/
*/
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
- xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
+ xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
"Version mismatch between document and entity\n");
}
}
if (ctxt->lastError.code != XML_ERR_OK)
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
- if (sax != NULL)
+ if (sax != NULL)
ctxt->sax = oldsax;
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
oldctxt->node_seq.length = ctxt->node_seq.length;
* @filename: the filename or URL
* @options: a combination of xmlParserOption
*
- * Create a parser context for a file or URL content.
+ * Create a parser context for a file or URL content.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time and for file accesses
*
* xmlCreateFileParserCtxt:
* @filename: the filename
*
- * Create a parser context for a file content.
+ * Create a parser context for a file content.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time.
*
if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
xmlClearParserCtxt(ctxt);
return;
}
-
+
xmlClearParserCtxt(ctxt);
if (filename != NULL)
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
*
* parse an XML file and call the given SAX handler routines.
* Automatic support for ZLIB/Compress compressed document is provided
- *
+ *
* Returns 0 in case of success or a error number otherwise
*/
int
const char *filename) {
int ret = 0;
xmlParserCtxtPtr ctxt;
-
+
ctxt = xmlCreateFileParserCtxt(filename);
if (ctxt == NULL) return -1;
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
if (user_data != NULL)
ctxt->userData = user_data;
-
+
xmlParseDocument(ctxt);
-
+
if (ctxt->wellFormed)
ret = 0;
else {
ctxt->myDoc = NULL;
}
xmlFreeParserCtxt(ctxt);
-
+
return ret;
}
#endif /* LIBXML_SAX1_ENABLED */
/************************************************************************
* *
- * Front ends when parsing from memory *
+ * Front ends when parsing from memory *
* *
************************************************************************/
input->filename = NULL;
input->buf = buf;
- input->base = input->buf->buffer->content;
- input->cur = input->buf->buffer->content;
- input->end = &input->buf->buffer->content[input->buf->buffer->use];
+ xmlBufResetInput(input->buf->buffer, input);
inputPush(ctxt, input);
return(ctxt);
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = NULL;
}
- if (sax != NULL)
+ if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
* parse an XML in-memory block and use the given SAX function block
* to handle the parsing callback. If sax is NULL, fallback to the default
* DOM tree building routines.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
* @size: the size of the array
*
* parse an XML in-memory block and build a tree.
- *
+ *
* Returns the resulting document tree
*/
ctxt->userData = user_data;
xmlParseDocument(ctxt);
-
+
if (ctxt->wellFormed)
ret = 0;
else {
ctxt->myDoc = NULL;
}
xmlFreeParserCtxt(ctxt);
-
+
return ret;
}
#endif /* LIBXML_SAX1_ENABLED */
* parse an XML in-memory document and build a tree.
* It use the given SAX function block to handle the parsing callback.
* If sax is NULL, fallback to the default DOM tree building routines.
- *
+ *
* Returns the resulting document tree
*/
ctxt = xmlCreateDocParserCtxt(cur);
if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
+ if (sax != NULL) {
oldsax = ctxt->sax;
ctxt->sax = sax;
ctxt->userData = NULL;
if (sax != NULL)
ctxt->sax = oldsax;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
* @cur: a pointer to an array of xmlChar
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
#ifdef LIBXML_LEGACY_ENABLED
/************************************************************************
* *
- * Specific function to keep track of entities references *
- * and used by the XSLT debugger *
+ * Specific function to keep track of entities references *
+ * and used by the XSLT debugger *
* *
************************************************************************/
* xmlAddEntityReference:
* @ent : A valid entity
* @firstNode : A valid first node for children of entity
- * @lastNode : A valid last node of children entity
+ * @lastNode : A valid last node of children entity
*
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
*/
/************************************************************************
* *
- * Miscellaneous *
+ * Miscellaneous *
* *
************************************************************************/
* current scope
*/
#define DICT_FREE(str) \
- if ((str) && ((!dict) || \
+ if ((str) && ((!dict) || \
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
xmlFree((char *)(str));
{
xmlParserInputPtr input;
xmlDictPtr dict;
-
+
if (ctxt == NULL)
return;
ctxt->catalogs = NULL;
ctxt->nbentities = 0;
ctxt->sizeentities = 0;
+ ctxt->sizeentcopy = 0;
xmlInitNodeInfoSeq(&ctxt->node_seq);
if (ctxt->attsDefault != NULL) {
inputStream->filename = (char *)
xmlCanonicPath((const xmlChar *) filename);
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
+ xmlBufResetInput(buf->buffer, inputStream);
inputPush(ctxt, inputStream);
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
- use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
if (options & XML_PARSE_HUGE) {
ctxt->options |= XML_PARSE_HUGE;
options -= XML_PARSE_HUGE;
+ if (ctxt->dict != NULL)
+ xmlDictSetLimit(ctxt->dict, 0);
}
if (options & XML_PARSE_OLDSAX) {
ctxt->options |= XML_PARSE_OLDSAX;
ctxt->options |= XML_PARSE_IGNORE_ENC;
options -= XML_PARSE_IGNORE_ENC;
}
+ if (options & XML_PARSE_BIG_LINES) {
+ ctxt->options |= XML_PARSE_BIG_LINES;
+ options -= XML_PARSE_BIG_LINES;
+ }
ctxt->linenumbers = 1;
return (options);
}
* @options: a combination of xmlParserOption
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
* @options: a combination of xmlParserOption
*
* parse an XML file from the filesystem or the network.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
* @options: a combination of xmlParserOption
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
* parse an XML from a file descriptor and build a tree.
* NOTE that the file descriptor will not be closed when the
* reader is closed or reset.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
*
* parse an XML file from the filesystem or the network.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
*
* parse an XML in-memory document and build a tree.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
* This reuses the existing @ctxt parser context
* NOTE that the file descriptor will not be closed when the
* reader is closed or reset.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr