X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=parserInternals.c;h=bfc778ac1f778c5dab5b5fcd91dbe994e8f8d58c;hb=f86ba34b57d1a307688402b495d8b0532770d98d;hp=f8a70410680e04981cbb07e319cbbd4c33956a71;hpb=4e8fa1efca3c71ef7e0c515afb175e849a97d416;p=platform%2Fupstream%2Flibxml2.git diff --git a/parserInternals.c b/parserInternals.c index f8a7041..bfc778a 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -55,6 +55,10 @@ #include #include +#define CUR(ctxt) ctxt->input->cur +#define END(ctxt) ctxt->input->end +#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) + #include "buf.h" #include "enc.h" @@ -165,7 +169,7 @@ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, * * Handle an internal error */ -static void +static void LIBXML_ATTR_FORMAT(2,0) xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) { if ((ctxt != NULL) && (ctxt->disableSAX != 0) && @@ -193,7 +197,7 @@ xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) * * n encoding error */ -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, int val) { @@ -294,7 +298,7 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS */ int xmlParserInputGrow(xmlParserInputPtr in, int len) { - size_t ret; + int ret; size_t indx; const xmlChar *content; @@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt) (ctxt->input == NULL)) return; - if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { - if ((*ctxt->input->cur == 0) && - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && - (ctxt->instate != XML_PARSER_COMMENT)) { - /* - * If we are at the end of the current entity and - * the context allows it, we pop consumed entities - * automatically. - * the auto closing should be blocked in other cases - */ + if (!(VALID_CTXT(ctxt))) { + xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + xmlStopParser(ctxt); + return; + } + + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + if ((ctxt->instate != XML_PARSER_COMMENT)) xmlPopInput(ctxt); - } else { - const unsigned char *cur; - unsigned char c; + return; + } - /* - * 2.11 End-of-Line Handling - * the literal two-character sequence "#xD#xA" or a standalone - * literal #xD, an XML processor must pass to the application - * the single character #xA. - */ - if (*(ctxt->input->cur) == '\n') { - ctxt->input->line++; ctxt->input->col = 1; - } else - ctxt->input->col++; + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + const unsigned char *cur; + unsigned char c; - /* - * We are supposed to handle UTF8, check it's valid - * From rfc2044: encoding of the Unicode values on UTF-8: - * - * UCS-4 range (hex.) UTF-8 octet sequence (binary) - * 0000 0000-0000 007F 0xxxxxxx - * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx - * - * Check for the 0x110000 limit too - */ - cur = ctxt->input->cur; + /* + * 2.11 End-of-Line Handling + * the literal two-character sequence "#xD#xA" or a standalone + * literal #xD, an XML processor must pass to the application + * the single character #xA. + */ + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else + ctxt->input->col++; - c = *cur; - if (c & 0x80) { - if (c == 0xC0) - goto encoding_error; - if (cur[1] == 0) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + cur = ctxt->input->cur; + + c = *cur; + if (c & 0x80) { + if (c == 0xC0) + goto encoding_error; + if (cur[1] == 0) { + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + unsigned int val; + + if (cur[2] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); cur = ctxt->input->cur; } - if ((cur[1] & 0xc0) != 0x80) + if ((cur[2] & 0xc0) != 0x80) goto encoding_error; - if ((c & 0xe0) == 0xe0) { - unsigned int val; - - if (cur[2] == 0) { + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); cur = ctxt->input->cur; } - if ((cur[2] & 0xc0) != 0x80) + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) goto encoding_error; - if ((c & 0xf0) == 0xf0) { - if (cur[3] == 0) { - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - cur = ctxt->input->cur; - } - if (((c & 0xf8) != 0xf0) || - ((cur[3] & 0xc0) != 0x80)) - goto encoding_error; - /* 4-byte code */ - ctxt->input->cur += 4; - val = (cur[0] & 0x7) << 18; - val |= (cur[1] & 0x3f) << 12; - val |= (cur[2] & 0x3f) << 6; - val |= cur[3] & 0x3f; - } else { - /* 3-byte code */ - ctxt->input->cur += 3; - val = (cur[0] & 0xf) << 12; - val |= (cur[1] & 0x3f) << 6; - val |= cur[2] & 0x3f; - } - if (((val > 0xd7ff) && (val < 0xe000)) || - ((val > 0xfffd) && (val < 0x10000)) || - (val >= 0x110000)) { - xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, - "Char 0x%X out of allowed range\n", - val); - } - } else - /* 2-byte code */ - ctxt->input->cur += 2; + /* 4-byte code */ + ctxt->input->cur += 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + ctxt->input->cur += 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + if (((val > 0xd7ff) && (val < 0xe000)) || + ((val > 0xfffd) && (val < 0x10000)) || + (val >= 0x110000)) { + xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", + val); + } } else - /* 1-byte code */ - ctxt->input->cur++; + /* 2-byte code */ + ctxt->input->cur += 2; + } else + /* 1-byte code */ + ctxt->input->cur++; - ctxt->nbChars++; - if (*ctxt->input->cur == 0) - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - } + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } else { /* * Assume it's a fixed length encoding (1) with @@ -937,6 +943,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) { xmlCharEncodingHandlerPtr handler; int len = -1; + int ret; if (ctxt == NULL) return(-1); switch (enc) { @@ -1097,7 +1104,15 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) if (handler == NULL) return(-1); ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(xmlSwitchToEncodingInt(ctxt, handler, len)); + ret = xmlSwitchToEncodingInt(ctxt, handler, len); + if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { + /* + * on encoding conversion errors, stop the parser + */ + xmlStopParser(ctxt); + ctxt->errNo = XML_I18N_CONV_FAILED; + } + return(ret); } /** @@ -1450,6 +1465,8 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { if (entity->URI != NULL) input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); input->base = entity->content; + if (entity->length == 0) + entity->length = xmlStrlen(entity->content); input->cur = entity->content; input->length = entity->length; input->end = &entity->content[input->length]; @@ -1691,12 +1708,20 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->nsWellFormed = 1; ctxt->valid = 1; ctxt->loadsubset = xmlLoadExtDtdDefaultValue; + if (ctxt->loadsubset) { + ctxt->options |= XML_PARSE_DTDLOAD; + } ctxt->validate = xmlDoValidityCheckingDefaultValue; ctxt->pedantic = xmlPedanticParserDefaultValue; + if (ctxt->pedantic) { + ctxt->options |= XML_PARSE_PEDANTIC; + } ctxt->linenumbers = xmlLineNumbersDefaultValue; ctxt->keepBlanks = xmlKeepBlanksDefaultValue; - if (ctxt->keepBlanks == 0) + if (ctxt->keepBlanks == 0) { ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; + ctxt->options |= XML_PARSE_NOBLANKS; + } ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; ctxt->vctxt.userData = ctxt; @@ -1708,8 +1733,12 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) else ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->vctxt.nodeMax = 0; + ctxt->options |= XML_PARSE_DTDVALID; } ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; + if (ctxt->replaceEntities) { + ctxt->options |= XML_PARSE_NOENT; + } ctxt->record_info = 0; ctxt->nbChars = 0; ctxt->checkIndex = 0; @@ -1990,7 +2019,8 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, /* Otherwise, we need to add new node to buffer */ else { - if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { + if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || + (ctxt->node_seq.buffer == NULL)) { xmlParserNodeInfo *tmp_buffer; unsigned int byte_size;