rolled back to 2.9.2 because 2.9.4 doesn't work with XML validator
[platform/upstream/libxml2.git] / parserInternals.c
index bfc778a..df204fd 100644 (file)
 #include <libxml/globals.h>
 #include <libxml/chvalid.h>
 
-#define CUR(ctxt) ctxt->input->cur
-#define END(ctxt) ctxt->input->end
-#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
-
 #include "buf.h"
 #include "enc.h"
 
@@ -169,7 +165,7 @@ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
  *
  * Handle an internal error
  */
-static void LIBXML_ATTR_FORMAT(2,0)
+static void
 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
 {
     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
@@ -197,7 +193,7 @@ xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
  *
  * n encoding error
  */
-static void LIBXML_ATTR_FORMAT(3,0)
+static void
 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
                   const char *msg, int val)
 {
@@ -298,7 +294,7 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS
  */
 int
 xmlParserInputGrow(xmlParserInputPtr in, int len) {
-    int ret;
+    size_t ret;
     size_t indx;
     const xmlChar *content;
 
@@ -426,105 +422,103 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
         (ctxt->input == NULL))
         return;
 
-    if (!(VALID_CTXT(ctxt))) {
-        xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
-       ctxt->errNo = XML_ERR_INTERNAL_ERROR;
-        xmlStopParser(ctxt);
-       return;
-    }
-
-    if ((*ctxt->input->cur == 0) &&
-        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
-        if ((ctxt->instate != XML_PARSER_COMMENT))
-            xmlPopInput(ctxt);
-        return;
-    }
-
     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
-        const unsigned char *cur;
-        unsigned char c;
-
-        /*
-         *   2.11 End-of-Line Handling
-         *   the literal two-character sequence "#xD#xA" or a standalone
-         *   literal #xD, an XML processor must pass to the application
-         *   the single character #xA.
-         */
-        if (*(ctxt->input->cur) == '\n') {
-            ctxt->input->line++; ctxt->input->col = 1;
-        } else
-            ctxt->input->col++;
+        if ((*ctxt->input->cur == 0) &&
+            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
+            (ctxt->instate != XML_PARSER_COMMENT)) {
+            /*
+             * If we are at the end of the current entity and
+             * the context allows it, we pop consumed entities
+             * automatically.
+             * the auto closing should be blocked in other cases
+             */
+            xmlPopInput(ctxt);
+        } else {
+            const unsigned char *cur;
+            unsigned char c;
 
-        /*
-         * We are supposed to handle UTF8, check it's valid
-         * From rfc2044: encoding of the Unicode values on UTF-8:
-         *
-         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
-         * 0000 0000-0000 007F   0xxxxxxx
-         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
-         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
-         *
-         * Check for the 0x110000 limit too
-         */
-        cur = ctxt->input->cur;
+            /*
+             *   2.11 End-of-Line Handling
+             *   the literal two-character sequence "#xD#xA" or a standalone
+             *   literal #xD, an XML processor must pass to the application
+             *   the single character #xA.
+             */
+            if (*(ctxt->input->cur) == '\n') {
+                ctxt->input->line++; ctxt->input->col = 1;
+            } else
+                ctxt->input->col++;
 
-        c = *cur;
-        if (c & 0x80) {
-        if (c == 0xC0)
-           goto encoding_error;
-            if (cur[1] == 0) {
-                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
-                cur = ctxt->input->cur;
-            }
-            if ((cur[1] & 0xc0) != 0x80)
-                goto encoding_error;
-            if ((c & 0xe0) == 0xe0) {
-                unsigned int val;
+            /*
+             * We are supposed to handle UTF8, check it's valid
+             * From rfc2044: encoding of the Unicode values on UTF-8:
+             *
+             * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
+             * 0000 0000-0000 007F   0xxxxxxx
+             * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
+             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
+             *
+             * Check for the 0x110000 limit too
+             */
+            cur = ctxt->input->cur;
 
-                if (cur[2] == 0) {
+            c = *cur;
+            if (c & 0x80) {
+               if (c == 0xC0)
+                   goto encoding_error;
+                if (cur[1] == 0) {
                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                     cur = ctxt->input->cur;
                 }
-                if ((cur[2] & 0xc0) != 0x80)
+                if ((cur[1] & 0xc0) != 0x80)
                     goto encoding_error;
-                if ((c & 0xf0) == 0xf0) {
-                    if (cur[3] == 0) {
+                if ((c & 0xe0) == 0xe0) {
+                    unsigned int val;
+
+                    if (cur[2] == 0) {
                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                         cur = ctxt->input->cur;
                     }
-                    if (((c & 0xf8) != 0xf0) ||
-                        ((cur[3] & 0xc0) != 0x80))
+                    if ((cur[2] & 0xc0) != 0x80)
                         goto encoding_error;
-                    /* 4-byte code */
-                    ctxt->input->cur += 4;
-                    val = (cur[0] & 0x7) << 18;
-                    val |= (cur[1] & 0x3f) << 12;
-                    val |= (cur[2] & 0x3f) << 6;
-                    val |= cur[3] & 0x3f;
-                } else {
-                    /* 3-byte code */
-                    ctxt->input->cur += 3;
-                    val = (cur[0] & 0xf) << 12;
-                    val |= (cur[1] & 0x3f) << 6;
-                    val |= cur[2] & 0x3f;
-                }
-                if (((val > 0xd7ff) && (val < 0xe000)) ||
-                    ((val > 0xfffd) && (val < 0x10000)) ||
-                    (val >= 0x110000)) {
-               xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
-                                 "Char 0x%X out of allowed range\n",
-                                 val);
-                }
+                    if ((c & 0xf0) == 0xf0) {
+                        if (cur[3] == 0) {
+                            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                            cur = ctxt->input->cur;
+                        }
+                        if (((c & 0xf8) != 0xf0) ||
+                            ((cur[3] & 0xc0) != 0x80))
+                            goto encoding_error;
+                        /* 4-byte code */
+                        ctxt->input->cur += 4;
+                        val = (cur[0] & 0x7) << 18;
+                        val |= (cur[1] & 0x3f) << 12;
+                        val |= (cur[2] & 0x3f) << 6;
+                        val |= cur[3] & 0x3f;
+                    } else {
+                        /* 3-byte code */
+                        ctxt->input->cur += 3;
+                        val = (cur[0] & 0xf) << 12;
+                        val |= (cur[1] & 0x3f) << 6;
+                        val |= cur[2] & 0x3f;
+                    }
+                    if (((val > 0xd7ff) && (val < 0xe000)) ||
+                        ((val > 0xfffd) && (val < 0x10000)) ||
+                        (val >= 0x110000)) {
+                       xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
+                                         "Char 0x%X out of allowed range\n",
+                                         val);
+                    }
+                } else
+                    /* 2-byte code */
+                    ctxt->input->cur += 2;
             } else
-                /* 2-byte code */
-                ctxt->input->cur += 2;
-        } else
-            /* 1-byte code */
-            ctxt->input->cur++;
+                /* 1-byte code */
+                ctxt->input->cur++;
 
-        ctxt->nbChars++;
-        if (*ctxt->input->cur == 0)
-            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+            ctxt->nbChars++;
+            if (*ctxt->input->cur == 0)
+                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+        }
     } else {
         /*
          * Assume it's a fixed length encoding (1) with
@@ -943,7 +937,6 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 {
     xmlCharEncodingHandlerPtr handler;
     int len = -1;
-    int ret;
 
     if (ctxt == NULL) return(-1);
     switch (enc) {
@@ -1104,15 +1097,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
     if (handler == NULL)
        return(-1);
     ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    ret = xmlSwitchToEncodingInt(ctxt, handler, len);
-    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
-        /*
-        * on encoding conversion errors, stop the parser
-        */
-        xmlStopParser(ctxt);
-       ctxt->errNo = XML_I18N_CONV_FAILED;
-    }
-    return(ret);
+    return(xmlSwitchToEncodingInt(ctxt, handler, len));
 }
 
 /**
@@ -1465,8 +1450,6 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
     if (entity->URI != NULL)
        input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
     input->base = entity->content;
-    if (entity->length == 0)
-        entity->length = xmlStrlen(entity->content);
     input->cur = entity->content;
     input->length = entity->length;
     input->end = &entity->content[input->length];