#include <libxml/globals.h>
#include <libxml/chvalid.h>
+#include "buf.h"
+#include "enc.h"
+
/*
* Various global defaults for parsing
*/
xmlInitParser();
if ((myversion / 10000) != (version / 10000)) {
- xmlGenericError(xmlGenericErrorContext,
+ xmlGenericError(xmlGenericErrorContext,
"Fatal: program compiled against libxml %d using libxml %d\n",
(version / 10000), (myversion / 10000));
- fprintf(stderr,
+ fprintf(stderr,
"Fatal: program compiled against libxml %d using libxml %d\n",
(version / 10000), (myversion / 10000));
}
if ((myversion / 100) < (version / 100)) {
- xmlGenericError(xmlGenericErrorContext,
+ xmlGenericError(xmlGenericErrorContext,
"Warning: program compiled against libxml %d using older %d\n",
(version / 100), (myversion / 100));
}
/************************************************************************
* *
- * Some factorized error routines *
+ * Some factorized error routines *
* *
************************************************************************/
/************************************************************************
* *
- * Input handling functions for progressive parsing *
+ * Input handling functions for progressive parsing *
* *
************************************************************************/
static
void check_buffer(xmlParserInputPtr in) {
- if (in->base != in->buf->buffer->content) {
+ if (in->base != xmlBufContent(in->buf->buffer)) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: base mismatch problem\n");
}
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur < base problem\n");
}
- if (in->cur > in->base + in->buf->buffer->use) {
+ if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur > base + use problem\n");
}
- xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
- (int) in, (int) in->buf->buffer->content, in->cur - in->base,
- in->buf->buffer->use, in->buf->buffer->size);
+ xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
+ (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
+ xmlBufUse(in->buf->buffer));
}
#else
-#define CHECK_BUFFER(in)
+#define CHECK_BUFFER(in)
#endif
* @in: an XML parser input
* @len: an indicative size for the lookahead
*
- * This function refresh the input for the parser. It doesn't try to
- * preserve pointers to the input buffer, and discard already read data
+ * This function was internal and is deprecated.
*
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
- * end of this entity
+ * Returns -1 as this is an error to use it.
*/
int
-xmlParserInputRead(xmlParserInputPtr in, int len) {
- int ret;
- int used;
- int indx;
-
- if (in == NULL) return(-1);
-#ifdef DEBUG_INPUT
- xmlGenericError(xmlGenericErrorContext, "Read\n");
-#endif
- if (in->buf == NULL) return(-1);
- if (in->base == NULL) return(-1);
- if (in->cur == NULL) return(-1);
- if (in->buf->buffer == NULL) return(-1);
- if (in->buf->readcallback == NULL) return(-1);
-
- CHECK_BUFFER(in);
-
- used = in->cur - in->buf->buffer->content;
- ret = xmlBufferShrink(in->buf->buffer, used);
- if (ret > 0) {
- in->cur -= ret;
- in->consumed += ret;
- }
- ret = xmlParserInputBufferRead(in->buf, len);
- if (in->base != in->buf->buffer->content) {
- /*
- * the buffer has been reallocated
- */
- indx = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[indx];
- }
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
-
- CHECK_BUFFER(in);
-
- return(ret);
+xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
+ return(-1);
}
/**
* This function increase the input for the parser. It tries to
* preserve pointers to the input buffer, and keep already read data
*
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
+ * Returns the amount of char read, or -1 in case of error, 0 indicate the
* end of this entity
*/
int
xmlParserInputGrow(xmlParserInputPtr in, int len) {
- int ret;
- int indx;
+ size_t ret;
+ size_t indx;
+ const xmlChar *content;
- if (in == NULL) return(-1);
+ if ((in == NULL) || (len < 0)) return(-1);
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Grow\n");
#endif
CHECK_BUFFER(in);
indx = in->cur - in->base;
- if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
+ if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
CHECK_BUFFER(in);
return(0);
}
- if (in->buf->readcallback != NULL)
+ if (in->buf->readcallback != NULL) {
ret = xmlParserInputBufferGrow(in->buf, len);
- else
+ } else
return(0);
/*
* pointer arithmetic. Insure will raise it as a bug but in
* that specific case, that's not !
*/
- if (in->base != in->buf->buffer->content) {
+
+ content = xmlBufContent(in->buf->buffer);
+ if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[indx];
+ in->base = content;
+ in->cur = &content[indx];
}
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
+ in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
*/
void
xmlParserInputShrink(xmlParserInputPtr in) {
- int used;
- int ret;
- int indx;
+ size_t used;
+ size_t ret;
+ size_t indx;
+ const xmlChar *content;
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Shrink\n");
CHECK_BUFFER(in);
- used = in->cur - in->buf->buffer->content;
+ used = in->cur - xmlBufContent(in->buf->buffer);
/*
* Do not shrink on large buffers whose only a tiny fraction
* was consumed
*/
if (used > INPUT_CHUNK) {
- ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
+ ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
if (ret > 0) {
in->cur -= ret;
in->consumed += ret;
}
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
+ in->end = xmlBufEnd(in->buf->buffer);
}
CHECK_BUFFER(in);
- if (in->buf->buffer->use > INPUT_CHUNK) {
+ if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
return;
}
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
- if (in->base != in->buf->buffer->content) {
+ content = xmlBufContent(in->buf->buffer);
+ if (in->base != content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
- in->base = in->buf->buffer->content;
- in->cur = &in->buf->buffer->content[indx];
+ in->base = content;
+ in->cur = &content[indx];
}
- in->end = &in->buf->buffer->content[in->buf->buffer->use];
+ in->end = xmlBufEnd(in->buf->buffer);
CHECK_BUFFER(in);
}
/************************************************************************
* *
- * UTF8 character input and related functions *
+ * UTF8 character input and related functions *
* *
************************************************************************/
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
if (c & 0x80) {
if (c == 0xC0)
goto encoding_error;
- if (cur[1] == 0)
+ if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
unsigned int val;
- if (cur[2] == 0)
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error;
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
c = *cur;
if (c & 0x80) {
- if (c == 0xC0)
+ if (((c & 0x40) == 0) || (c == 0xC0))
goto encoding_error;
- if (cur[1] == 0)
+ if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
-
- if (cur[2] == 0)
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error;
val |= (cur[1] & 0x3f) << 12;
val |= (cur[2] & 0x3f) << 6;
val |= cur[3] & 0x3f;
+ if (val < 0x10000)
+ goto encoding_error;
} else {
/* 3-byte code */
*len = 3;
val = (cur[0] & 0xf) << 12;
val |= (cur[1] & 0x3f) << 6;
val |= cur[2] & 0x3f;
+ if (val < 0x800)
+ goto encoding_error;
}
} else {
/* 2-byte code */
*len = 2;
val = (cur[0] & 0x1f) << 6;
val |= cur[1] & 0x3f;
+ if (val < 0x80)
+ goto encoding_error;
}
if (!IS_CHAR(val)) {
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
"Char 0x%X out of allowed range\n", val);
- }
+ }
return(val);
} else {
/* 1-byte code */
*len = 1;
+ if (*ctxt->input->cur == 0)
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ if ((*ctxt->input->cur == 0) &&
+ (ctxt->input->end > ctxt->input->cur)) {
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x0 out of allowed range\n", 0);
+ }
if (*ctxt->input->cur == 0xD) {
if (ctxt->input->cur[1] == 0xA) {
ctxt->nbChars++;
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
}
- ctxt->charset = XML_CHAR_ENCODING_8859_1;
+ ctxt->charset = XML_CHAR_ENCODING_8859_1;
*len = 1;
return((int) *ctxt->input->cur);
}
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
* @out: pointer to an array of xmlChar
* @val: the char value
*
- * append the char value in the array
+ * append the char value in the array
*
* Returns the number of xmlChar written
*/
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*/
if (val >= 0x80) {
xmlChar *savedout = out;
* @out: pointer to an array of xmlChar
* @val: the char value
*
- * append the char value in the array
+ * append the char value in the array
*
* Returns the number of xmlChar written
*/
* *
************************************************************************/
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+ xmlCharEncodingHandlerPtr handler, int len);
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler, int len);
/**
* xmlSwitchEncoding:
* @ctxt: the parser context
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
{
xmlCharEncodingHandlerPtr handler;
+ int len = -1;
+ int ret;
if (ctxt == NULL) return(-1);
switch (enc) {
(ctxt->input->cur[2] == 0xBF)) {
ctxt->input->cur += 3;
}
- break ;
- default:
- break;
+ len = 90;
+ break;
+ case XML_CHAR_ENCODING_UCS2:
+ len = 90;
+ break;
+ case XML_CHAR_ENCODING_UCS4BE:
+ case XML_CHAR_ENCODING_UCS4LE:
+ case XML_CHAR_ENCODING_UCS4_2143:
+ case XML_CHAR_ENCODING_UCS4_3412:
+ len = 180;
+ break;
+ case XML_CHAR_ENCODING_EBCDIC:
+ case XML_CHAR_ENCODING_8859_1:
+ case XML_CHAR_ENCODING_8859_2:
+ case XML_CHAR_ENCODING_8859_3:
+ case XML_CHAR_ENCODING_8859_4:
+ case XML_CHAR_ENCODING_8859_5:
+ case XML_CHAR_ENCODING_8859_6:
+ case XML_CHAR_ENCODING_8859_7:
+ case XML_CHAR_ENCODING_8859_8:
+ case XML_CHAR_ENCODING_8859_9:
+ case XML_CHAR_ENCODING_ASCII:
+ case XML_CHAR_ENCODING_2022_JP:
+ case XML_CHAR_ENCODING_SHIFT_JIS:
+ case XML_CHAR_ENCODING_EUC_JP:
+ len = 45;
+ break;
}
handler = xmlGetCharEncodingHandler(enc);
if (handler == NULL) {
if (handler == NULL)
return(-1);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(xmlSwitchToEncoding(ctxt, handler));
+ ret = xmlSwitchToEncodingInt(ctxt, handler, len);
+ if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
+ /*
+ * on encoding conversion errors, stop the parser
+ */
+ xmlStopParser(ctxt);
+ ctxt->errNo = XML_I18N_CONV_FAILED;
+ }
+ return(ret);
}
/**
* @ctxt: the parser context
* @input: the input stream
* @handler: the encoding handler
+ * @len: the number of bytes to convert for the first line or -1
*
* change the input functions when discovering the character encoding
* of a given entity.
*
* Returns 0 in case of success, -1 otherwise
*/
-int
-xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
- xmlCharEncodingHandlerPtr handler)
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler, int len)
{
int nbchars;
/*
* Is there already some content down the pipe to convert ?
*/
- if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
+ if (xmlBufIsEmpty(input->buf->buffer) == 0) {
int processed;
unsigned int use;
/*
- * Specific handling of the Byte Order Mark for
+ * Specific handling of the Byte Order Mark for
* UTF-16
*/
if ((handler->name != NULL) &&
* Move it as the raw buffer and create a new input buffer
*/
processed = input->cur - input->base;
- xmlBufferShrink(input->buf->buffer, processed);
+ xmlBufShrink(input->buf->buffer, processed);
input->buf->raw = input->buf->buffer;
- input->buf->buffer = xmlBufferCreate();
+ input->buf->buffer = xmlBufCreate();
input->buf->rawconsumed = processed;
- use = input->buf->raw->use;
+ use = xmlBufUse(input->buf->raw);
if (ctxt->html) {
/*
* convert as much as possible of the buffer
*/
- nbchars = xmlCharEncInFunc(input->buf->encoder,
- input->buf->buffer,
- input->buf->raw);
+ nbchars = xmlCharEncInput(input->buf, 1);
} else {
/*
* convert just enough to get
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
- nbchars = xmlCharEncFirstLine(input->buf->encoder,
- input->buf->buffer,
- input->buf->raw);
+ nbchars = xmlCharEncFirstLineInput(input->buf, len);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
NULL);
return (-1);
}
- input->buf->rawconsumed += use - input->buf->raw->use;
- input->base = input->cur = input->buf->buffer->content;
- input->end = &input->base[input->buf->buffer->use];
-
+ input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
+ xmlBufResetInput(input->buf->buffer, input);
}
return (0);
} else if (input->length == 0) {
}
/**
- * xmlSwitchToEncoding:
+ * xmlSwitchInputEncoding:
* @ctxt: the parser context
+ * @input: the input stream
* @handler: the encoding handler
*
* change the input functions when discovering the character encoding
* Returns 0 in case of success, -1 otherwise
*/
int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
-{
+xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler) {
+ return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
+}
+
+/**
+ * xmlSwitchToEncodingInt:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ * @len: the length to convert or -1
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity, and convert only @len bytes of the output, this
+ * is needed on auto detect to allows any declared encoding later to
+ * convert the actual content after the xmlDecl
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+ xmlCharEncodingHandlerPtr handler, int len) {
int ret = 0;
if (handler != NULL) {
if (ctxt->input != NULL) {
- ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
+ ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
} else {
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
NULL);
* The parsing is now done in UTF8 natively
*/
ctxt->charset = XML_CHAR_ENCODING_UTF8;
- } else
+ } else
return(-1);
return(ret);
}
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
+{
+ return (xmlSwitchToEncodingInt(ctxt, handler, -1));
+}
+
/************************************************************************
* *
* Commodity functions to handle entities processing *
if (input->version != NULL) xmlFree((char *) input->version);
if ((input->free != NULL) && (input->base != NULL))
input->free((xmlChar *) input->base);
- if (input->buf != NULL)
+ if (input->buf != NULL)
xmlFreeParserInputBuffer(input->buf);
xmlFree(input);
}
* xmlNewInputStream:
* @ctxt: an XML parser context
*
- * Create a new input stream structure
+ * Create a new input stream structure.
+ *
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
xmlParserInputPtr input;
- static int id = 0;
input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
if (input == NULL) {
input->line = 1;
input->col = 1;
input->standalone = -1;
+
/*
- * we don't care about thread reentrancy unicity for a single
- * parser context (and hence thread) is sufficient.
+ * If the context is NULL the id cannot be initialized, but that
+ * should not happen while parsing which is the situation where
+ * the id is actually needed.
*/
- input->id = id++;
+ if (ctxt != NULL)
+ input->id = ctxt->input_id++;
+
return(input);
}
}
inputStream->filename = NULL;
inputStream->buf = input;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
+
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
if (input == NULL) {
return(NULL);
}
- input->filename = (char *) entity->URI;
+ if (entity->URI != NULL)
+ input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
input->base = entity->content;
+ if (entity->length == 0)
+ entity->length = xmlStrlen(entity->content);
input->cur = entity->content;
input->length = entity->length;
input->end = &entity->content[input->length];
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
if (inputStream == NULL)
return(NULL);
-
+
if (inputStream->filename == NULL)
URI = xmlStrdup((xmlChar *) filename);
else
if (URI != NULL) xmlFree((char *) URI);
inputStream->directory = directory;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
if ((ctxt->directory == NULL) && (directory != NULL))
ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
return(inputStream);
xmlErrMemory(NULL, "cannot initialize parser context\n");
return(-1);
}
+ xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
+
if (ctxt->sax == NULL)
ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
if (ctxt->sax == NULL) {
ctxt->nsWellFormed = 1;
ctxt->valid = 1;
ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
+ if (ctxt->loadsubset) {
+ ctxt->options |= XML_PARSE_DTDLOAD;
+ }
ctxt->validate = xmlDoValidityCheckingDefaultValue;
ctxt->pedantic = xmlPedanticParserDefaultValue;
+ if (ctxt->pedantic) {
+ ctxt->options |= XML_PARSE_PEDANTIC;
+ }
ctxt->linenumbers = xmlLineNumbersDefaultValue;
ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
- if (ctxt->keepBlanks == 0)
+ if (ctxt->keepBlanks == 0) {
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
+ ctxt->options |= XML_PARSE_NOBLANKS;
+ }
ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
ctxt->vctxt.userData = ctxt;
else
ctxt->vctxt.warning = xmlParserValidityWarning;
ctxt->vctxt.nodeMax = 0;
+ ctxt->options |= XML_PARSE_DTDVALID;
}
ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
+ if (ctxt->replaceEntities) {
+ ctxt->options |= XML_PARSE_NOENT;
+ }
ctxt->record_info = 0;
ctxt->nbChars = 0;
ctxt->checkIndex = 0;
ctxt->depth = 0;
ctxt->charset = XML_CHAR_ENCODING_UTF8;
ctxt->catalogs = NULL;
+ ctxt->nbentities = 0;
+ ctxt->sizeentities = 0;
+ ctxt->sizeentcopy = 0;
+ ctxt->input_id = 1;
xmlInitNodeInfoSeq(&ctxt->node_seq);
return(0);
}
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
+ if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
- if (ctxt->attsDefault != NULL)
+ if (ctxt->attsDefault != NULL)
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
if (ctxt->attsSpecial != NULL)
xmlHashFree(ctxt->attsSpecial, NULL);
* @node: an XML node within the tree
*
* Find the parser node info struct for a given node
- *
+ *
* Returns an xmlParserNodeInfo block pointer or NULL
*/
const xmlParserNodeInfo *
* @seq: a node info sequence pointer
* @node: an XML node pointer
*
- *
+ *
* xmlParserFindNodeInfoIndex : Find the index that the info record for
* the given node is or should be at in a sorted sequence
*
pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
info->node);
- if ((pos < ctxt->node_seq.length) &&
+ if ((pos < ctxt->node_seq.length) &&
(ctxt->node_seq.buffer != NULL) &&
(ctxt->node_seq.buffer[pos].node == info->node)) {
ctxt->node_seq.buffer[pos] = *info;
/* Otherwise, we need to add new node to buffer */
else {
- if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
+ if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
+ (ctxt->node_seq.buffer == NULL)) {
xmlParserNodeInfo *tmp_buffer;
unsigned int byte_size;
************************************************************************/
/**
* xmlPedanticParserDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for enabling pedantic warnings.
*
/**
* xmlLineNumbersDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for enabling line numbers in elements
* contents. This may break on old application and is turned off by default.
/**
* xmlSubstituteEntitiesDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for default entity support.
* Initially the parser always keep entity references instead of substituting
/**
* xmlKeepBlanksDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for default blanks text nodes support.
* The 1.x version of the parser used an heuristic to try to detect
* ignorableWhitespace() are only generated when running the parser in
* validating mode and when the current element doesn't allow CDATA or
* mixed content.
- * This function is provided as a way to force the standard behavior
+ * This function is provided as a way to force the standard behavior
* on 1.X libs and to switch back to the old mode for compatibility when
* running 1.X client code on 2.X . Upgrade of 1.X code should be done
* by using xmlIsBlankNode() commodity function to detect the "empty"
int old = xmlKeepBlanksDefaultValue;
xmlKeepBlanksDefaultValue = val;
- xmlIndentTreeOutput = !val;
+ if (!val) xmlIndentTreeOutput = 1;
return(old);
}