Imported Upstream version 0.19.7

[platform/upstream/gettext.git] / gettext-tools / gnulib-lib / libxml / parserInternals.c
diff --git a/gettext-tools/gnulib-lib/libxml/parserInternals.c b/gettext-tools/gnulib-lib/libxml/parserInternals.c

index f4da58d..2b8646c 100644 (file)
--- a/gettext-tools/gnulib-lib/libxml/parserInternals.c
+++ b/gettext-tools/gnulib-lib/libxml/parserInternals.c
@@ -55,6 +55,9 @@
  #include <libxml/globals.h>
  #include <libxml/chvalid.h>
  
+#include "buf.h"
+#include "enc.h"
+
  /*
   * Various global defaults for parsing
   */
@@ -73,15 +76,15 @@ xmlCheckVersion(int version) {
      xmlInitParser();
  
      if ((myversion / 10000) != (version / 10000)) {
-       xmlGenericError(xmlGenericErrorContext, 
+       xmlGenericError(xmlGenericErrorContext,
                 "Fatal: program compiled against libxml %d using libxml %d\n",
                 (version / 10000), (myversion / 10000));
-       fprintf(stderr, 
+       fprintf(stderr,
                 "Fatal: program compiled against libxml %d using libxml %d\n",
                 (version / 10000), (myversion / 10000));
      }
      if ((myversion / 100) < (version / 100)) {
-       xmlGenericError(xmlGenericErrorContext, 
+       xmlGenericError(xmlGenericErrorContext,
                 "Warning: program compiled against libxml %d using older %d\n",
                 (version / 100), (myversion / 100));
      }
@@ -90,7 +93,7 @@ xmlCheckVersion(int version) {
  
  /************************************************************************
   *                                                                     *
- *             Some factorized error routines                          *
+ *             Some factorized error routines                          *
   *                                                                     *
   ************************************************************************/
  
@@ -225,7 +228,7 @@ xmlIsLetter(int c) {
  
  /************************************************************************
   *                                                                     *
- *             Input handling functions for progressive parsing        *
+ *             Input handling functions for progressive parsing        *
   *                                                                     *
   ************************************************************************/
  
@@ -242,7 +245,7 @@ xmlIsLetter(int c) {
  
  static
  void check_buffer(xmlParserInputPtr in) {
-    if (in->base != in->buf->buffer->content) {
+    if (in->base != xmlBufContent(in->buf->buffer)) {
          xmlGenericError(xmlGenericErrorContext,
                 "xmlParserInput: base mismatch problem\n");
      }
@@ -250,17 +253,17 @@ void check_buffer(xmlParserInputPtr in) {
          xmlGenericError(xmlGenericErrorContext,
                 "xmlParserInput: cur < base problem\n");
      }
-    if (in->cur > in->base + in->buf->buffer->use) {
+    if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
          xmlGenericError(xmlGenericErrorContext,
                 "xmlParserInput: cur > base + use problem\n");
      }
-    xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
-            (int) in, (int) in->buf->buffer->content, in->cur - in->base,
-           in->buf->buffer->use, in->buf->buffer->size);
+    xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
+            (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
+           xmlBufUse(in->buf->buffer));
  }
  
  #else
-#define CHECK_BUFFER(in) 
+#define CHECK_BUFFER(in)
  #endif
  
  
@@ -269,50 +272,13 @@ void check_buffer(xmlParserInputPtr in) {
   * @in:  an XML parser input
   * @len:  an indicative size for the lookahead
   *
- * This function refresh the input for the parser. It doesn't try to
- * preserve pointers to the input buffer, and discard already read data
+ * This function was internal and is deprecated.
   *
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
- * end of this entity
+ * Returns -1 as this is an error to use it.
   */
  int
-xmlParserInputRead(xmlParserInputPtr in, int len) {
-    int ret;
-    int used;
-    int indx;
-
-    if (in == NULL) return(-1);
-#ifdef DEBUG_INPUT
-    xmlGenericError(xmlGenericErrorContext, "Read\n");
-#endif
-    if (in->buf == NULL) return(-1);
-    if (in->base == NULL) return(-1);
-    if (in->cur == NULL) return(-1);
-    if (in->buf->buffer == NULL) return(-1);
-    if (in->buf->readcallback == NULL) return(-1);
-
-    CHECK_BUFFER(in);
-
-    used = in->cur - in->buf->buffer->content;
-    ret = xmlBufferShrink(in->buf->buffer, used);
-    if (ret > 0) {
-       in->cur -= ret;
-       in->consumed += ret;
-    }
-    ret = xmlParserInputBufferRead(in->buf, len);
-    if (in->base != in->buf->buffer->content) {
-        /*
-        * the buffer has been reallocated
-        */
-       indx = in->cur - in->base;
-       in->base = in->buf->buffer->content;
-       in->cur = &in->buf->buffer->content[indx];
-    }
-    in->end = &in->buf->buffer->content[in->buf->buffer->use];
-
-    CHECK_BUFFER(in);
-
-    return(ret);
+xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
+    return(-1);
  }
  
  /**
@@ -323,15 +289,16 @@ xmlParserInputRead(xmlParserInputPtr in, int len) {
   * This function increase the input for the parser. It tries to
   * preserve pointers to the input buffer, and keep already read data
   *
- * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
+ * Returns the amount of char read, or -1 in case of error, 0 indicate the
   * end of this entity
   */
  int
  xmlParserInputGrow(xmlParserInputPtr in, int len) {
-    int ret;
-    int indx;
+    size_t ret;
+    size_t indx;
+    const xmlChar *content;
  
-    if (in == NULL) return(-1);
+    if ((in == NULL) || (len < 0)) return(-1);
  #ifdef DEBUG_INPUT
      xmlGenericError(xmlGenericErrorContext, "Grow\n");
  #endif
@@ -343,15 +310,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
      CHECK_BUFFER(in);
  
      indx = in->cur - in->base;
-    if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
+    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
  
         CHECK_BUFFER(in);
  
          return(0);
      }
-    if (in->buf->readcallback != NULL)
+    if (in->buf->readcallback != NULL) {
         ret = xmlParserInputBufferGrow(in->buf, len);
-    else       
+    } else
          return(0);
  
      /*
@@ -360,15 +327,17 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
       *        pointer arithmetic. Insure will raise it as a bug but in
       *        that specific case, that's not !
       */
-    if (in->base != in->buf->buffer->content) {
+
+    content = xmlBufContent(in->buf->buffer);
+    if (in->base != content) {
          /*
          * the buffer has been reallocated
          */
         indx = in->cur - in->base;
-       in->base = in->buf->buffer->content;
-       in->cur = &in->buf->buffer->content[indx];
+       in->base = content;
+       in->cur = &content[indx];
      }
-    in->end = &in->buf->buffer->content[in->buf->buffer->use];
+    in->end = xmlBufEnd(in->buf->buffer);
  
      CHECK_BUFFER(in);
  
@@ -383,9 +352,10 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
   */
  void
  xmlParserInputShrink(xmlParserInputPtr in) {
-    int used;
-    int ret;
-    int indx;
+    size_t used;
+    size_t ret;
+    size_t indx;
+    const xmlChar *content;
  
  #ifdef DEBUG_INPUT
      xmlGenericError(xmlGenericErrorContext, "Shrink\n");
@@ -398,42 +368,43 @@ xmlParserInputShrink(xmlParserInputPtr in) {
  
      CHECK_BUFFER(in);
  
-    used = in->cur - in->buf->buffer->content;
+    used = in->cur - xmlBufContent(in->buf->buffer);
      /*
       * Do not shrink on large buffers whose only a tiny fraction
       * was consumed
       */
      if (used > INPUT_CHUNK) {
-       ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
+       ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
         if (ret > 0) {
             in->cur -= ret;
             in->consumed += ret;
         }
-       in->end = &in->buf->buffer->content[in->buf->buffer->use];
+       in->end = xmlBufEnd(in->buf->buffer);
      }
  
      CHECK_BUFFER(in);
  
-    if (in->buf->buffer->use > INPUT_CHUNK) {
+    if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
          return;
      }
      xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
-    if (in->base != in->buf->buffer->content) {
+    content = xmlBufContent(in->buf->buffer);
+    if (in->base != content) {
          /*
          * the buffer has been reallocated
          */
         indx = in->cur - in->base;
-       in->base = in->buf->buffer->content;
-       in->cur = &in->buf->buffer->content[indx];
+       in->base = content;
+       in->cur = &content[indx];
      }
-    in->end = &in->buf->buffer->content[in->buf->buffer->use];
+    in->end = xmlBufEnd(in->buf->buffer);
  
      CHECK_BUFFER(in);
  }
  
  /************************************************************************
   *                                                                     *
- *             UTF8 character input and related functions              *
+ *             UTF8 character input and related functions              *
   *                                                                     *
   ************************************************************************/
  
@@ -484,7 +455,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
               * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
               * 0000 0000-0000 007F   0xxxxxxx
               * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
-             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
+             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
               *
               * Check for the 0x110000 limit too
               */
@@ -494,20 +465,26 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
              if (c & 0x80) {
                 if (c == 0xC0)
                     goto encoding_error;
-                if (cur[1] == 0)
+                if (cur[1] == 0) {
                      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                    cur = ctxt->input->cur;
+                }
                  if ((cur[1] & 0xc0) != 0x80)
                      goto encoding_error;
                  if ((c & 0xe0) == 0xe0) {
                      unsigned int val;
  
-                    if (cur[2] == 0)
+                    if (cur[2] == 0) {
                          xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                        cur = ctxt->input->cur;
+                    }
                      if ((cur[2] & 0xc0) != 0x80)
                          goto encoding_error;
                      if ((c & 0xf0) == 0xf0) {
-                        if (cur[3] == 0)
+                        if (cur[3] == 0) {
                              xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                            cur = ctxt->input->cur;
+                        }
                          if (((c & 0xf8) != 0xf0) ||
                              ((cur[3] & 0xc0) != 0x80))
                              goto encoding_error;
@@ -628,7 +605,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
          * 0000 0000-0000 007F   0xxxxxxx
          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
-        * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
+        * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
          *
          * Check for the 0x110000 limit too
          */
@@ -638,21 +615,26 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
  
         c = *cur;
         if (c & 0x80) {
-           if (c == 0xC0)
+           if (((c & 0x40) == 0) || (c == 0xC0))
                 goto encoding_error;
-           if (cur[1] == 0)
+           if (cur[1] == 0) {
                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                cur = ctxt->input->cur;
+            }
             if ((cur[1] & 0xc0) != 0x80)
                 goto encoding_error;
             if ((c & 0xe0) == 0xe0) {
-
-               if (cur[2] == 0)
+               if (cur[2] == 0) {
                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                    cur = ctxt->input->cur;
+                }
                 if ((cur[2] & 0xc0) != 0x80)
                     goto encoding_error;
                 if ((c & 0xf0) == 0xf0) {
-                   if (cur[3] == 0)
+                   if (cur[3] == 0) {
                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+                        cur = ctxt->input->cur;
+                    }
                     if (((c & 0xf8) != 0xf0) ||
                         ((cur[3] & 0xc0) != 0x80))
                         goto encoding_error;
@@ -662,27 +644,40 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                     val |= (cur[1] & 0x3f) << 12;
                     val |= (cur[2] & 0x3f) << 6;
                     val |= cur[3] & 0x3f;
+                   if (val < 0x10000)
+                       goto encoding_error;
                 } else {
                   /* 3-byte code */
                     *len = 3;
                     val = (cur[0] & 0xf) << 12;
                     val |= (cur[1] & 0x3f) << 6;
                     val |= cur[2] & 0x3f;
+                   if (val < 0x800)
+                       goto encoding_error;
                 }
             } else {
               /* 2-byte code */
                 *len = 2;
                 val = (cur[0] & 0x1f) << 6;
                 val |= cur[1] & 0x3f;
+               if (val < 0x80)
+                   goto encoding_error;
             }
             if (!IS_CHAR(val)) {
                 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
                                   "Char 0x%X out of allowed range\n", val);
-           }    
+           }
             return(val);
         } else {
             /* 1-byte code */
             *len = 1;
+           if (*ctxt->input->cur == 0)
+               xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+           if ((*ctxt->input->cur == 0) &&
+               (ctxt->input->end > ctxt->input->cur)) {
+               xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
+                                 "Char 0x0 out of allowed range\n", 0);
+           }
             if (*ctxt->input->cur == 0xD) {
                 if (ctxt->input->cur[1] == 0xA) {
                     ctxt->nbChars++;
@@ -735,7 +730,7 @@ encoding_error:
                      "Input is not proper UTF-8, indicate encoding !\n%s",
                      BAD_CAST buffer, NULL);
      }
-    ctxt->charset = XML_CHAR_ENCODING_8859_1; 
+    ctxt->charset = XML_CHAR_ENCODING_8859_1;
      *len = 1;
      return((int) *ctxt->input->cur);
  }
@@ -764,7 +759,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
           * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
           * 0000 0000-0000 007F   0xxxxxxx
           * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
-         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
+         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
           *
           * Check for the 0x110000 limit too
           */
@@ -857,7 +852,7 @@ encoding_error:
   * @out:  pointer to an array of xmlChar
   * @val:  the char value
   *
- * append the char value in the array 
+ * append the char value in the array
   *
   * Returns the number of xmlChar written
   */
@@ -871,7 +866,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) {
       * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
       * 0000 0000-0000 007F   0xxxxxxx
       * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
-     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
+     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
       */
      if  (val >= 0x80) {
         xmlChar *savedout = out;
@@ -899,7 +894,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) {
   * @out:  pointer to an array of xmlChar
   * @val:  the char value
   *
- * append the char value in the array 
+ * append the char value in the array
   *
   * Returns the number of xmlChar written
   */
@@ -921,6 +916,12 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
   *                                                                     *
   ************************************************************************/
  
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+                       xmlCharEncodingHandlerPtr handler, int len);
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+                          xmlCharEncodingHandlerPtr handler, int len);
  /**
   * xmlSwitchEncoding:
   * @ctxt:  the parser context
@@ -935,6 +936,8 @@ int
  xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
  {
      xmlCharEncodingHandlerPtr handler;
+    int len = -1;
+    int ret;
  
      if (ctxt == NULL) return(-1);
      switch (enc) {
@@ -978,9 +981,33 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
              (ctxt->input->cur[2] == 0xBF)) {
              ctxt->input->cur += 3;
          }
-       break ;
-       default:
-           break;
+        len = 90;
+       break;
+    case XML_CHAR_ENCODING_UCS2:
+        len = 90;
+       break;
+    case XML_CHAR_ENCODING_UCS4BE:
+    case XML_CHAR_ENCODING_UCS4LE:
+    case XML_CHAR_ENCODING_UCS4_2143:
+    case XML_CHAR_ENCODING_UCS4_3412:
+        len = 180;
+       break;
+    case XML_CHAR_ENCODING_EBCDIC:
+    case XML_CHAR_ENCODING_8859_1:
+    case XML_CHAR_ENCODING_8859_2:
+    case XML_CHAR_ENCODING_8859_3:
+    case XML_CHAR_ENCODING_8859_4:
+    case XML_CHAR_ENCODING_8859_5:
+    case XML_CHAR_ENCODING_8859_6:
+    case XML_CHAR_ENCODING_8859_7:
+    case XML_CHAR_ENCODING_8859_8:
+    case XML_CHAR_ENCODING_8859_9:
+    case XML_CHAR_ENCODING_ASCII:
+    case XML_CHAR_ENCODING_2022_JP:
+    case XML_CHAR_ENCODING_SHIFT_JIS:
+    case XML_CHAR_ENCODING_EUC_JP:
+        len = 45;
+       break;
      }
      handler = xmlGetCharEncodingHandler(enc);
      if (handler == NULL) {
@@ -1071,7 +1098,15 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
      if (handler == NULL)
         return(-1);
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    return(xmlSwitchToEncoding(ctxt, handler));
+    ret = xmlSwitchToEncodingInt(ctxt, handler, len);
+    if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) {
+        /*
+        * on encoding conversion errors, stop the parser
+        */
+        xmlStopParser(ctxt);
+       ctxt->errNo = XML_I18N_CONV_FAILED;
+    }
+    return(ret);
  }
  
  /**
@@ -1079,15 +1114,16 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
   * @ctxt:  the parser context
   * @input:  the input stream
   * @handler:  the encoding handler
+ * @len:  the number of bytes to convert for the first line or -1
   *
   * change the input functions when discovering the character encoding
   * of a given entity.
   *
   * Returns 0 in case of success, -1 otherwise
   */
-int
-xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
-                       xmlCharEncodingHandlerPtr handler)
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+                          xmlCharEncodingHandlerPtr handler, int len)
  {
      int nbchars;
  
@@ -1128,12 +1164,12 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
          /*
           * Is there already some content down the pipe to convert ?
           */
-        if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
+        if (xmlBufIsEmpty(input->buf->buffer) == 0) {
              int processed;
             unsigned int use;
  
              /*
-             * Specific handling of the Byte Order Mark for 
+             * Specific handling of the Byte Order Mark for
               * UTF-16
               */
              if ((handler->name != NULL) &&
@@ -1164,19 +1200,17 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
               * Move it as the raw buffer and create a new input buffer
               */
              processed = input->cur - input->base;
-            xmlBufferShrink(input->buf->buffer, processed);
+            xmlBufShrink(input->buf->buffer, processed);
              input->buf->raw = input->buf->buffer;
-            input->buf->buffer = xmlBufferCreate();
+            input->buf->buffer = xmlBufCreate();
             input->buf->rawconsumed = processed;
-           use = input->buf->raw->use;
+           use = xmlBufUse(input->buf->raw);
  
              if (ctxt->html) {
                  /*
                   * convert as much as possible of the buffer
                   */
-                nbchars = xmlCharEncInFunc(input->buf->encoder,
-                                           input->buf->buffer,
-                                           input->buf->raw);
+                nbchars = xmlCharEncInput(input->buf, 1);
              } else {
                  /*
                   * convert just enough to get
@@ -1184,9 +1218,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                   * parsed with the autodetected encoding
                   * into the parser reading buffer.
                   */
-                nbchars = xmlCharEncFirstLine(input->buf->encoder,
-                                              input->buf->buffer,
-                                              input->buf->raw);
+                nbchars = xmlCharEncFirstLineInput(input->buf, len);
              }
              if (nbchars < 0) {
                  xmlErrInternal(ctxt,
@@ -1194,10 +1226,8 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                                 NULL);
                  return (-1);
              }
-           input->buf->rawconsumed += use - input->buf->raw->use;
-            input->base = input->cur = input->buf->buffer->content;
-            input->end = &input->base[input->buf->buffer->use];
-
+           input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
+            xmlBufResetInput(input->buf->buffer, input);
          }
          return (0);
      } else if (input->length == 0) {
@@ -1212,8 +1242,9 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
  }
  
  /**
- * xmlSwitchToEncoding:
+ * xmlSwitchInputEncoding:
   * @ctxt:  the parser context
+ * @input:  the input stream
   * @handler:  the encoding handler
   *
   * change the input functions when discovering the character encoding
@@ -1222,13 +1253,32 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
   * Returns 0 in case of success, -1 otherwise
   */
  int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
-{
+xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+                          xmlCharEncodingHandlerPtr handler) {
+    return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
+}
+
+/**
+ * xmlSwitchToEncodingInt:
+ * @ctxt:  the parser context
+ * @handler:  the encoding handler
+ * @len: the length to convert or -1
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity, and convert only @len bytes of the output, this
+ * is needed on auto detect to allows any declared encoding later to
+ * convert the actual content after the xmlDecl
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+                       xmlCharEncodingHandlerPtr handler, int len) {
      int ret = 0;
  
      if (handler != NULL) {
          if (ctxt->input != NULL) {
-           ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
+           ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
         } else {
             xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
                            NULL);
@@ -1238,11 +1288,27 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
          * The parsing is now done in UTF8 natively
          */
         ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    } else 
+    } else
         return(-1);
      return(ret);
  }
  
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt:  the parser context
+ * @handler:  the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
+{
+    return (xmlSwitchToEncodingInt(ctxt, handler, -1));
+}
+
  /************************************************************************
   *                                                                     *
   *     Commodity functions to handle entities processing               *
@@ -1265,7 +1331,7 @@ xmlFreeInputStream(xmlParserInputPtr input) {
      if (input->version != NULL) xmlFree((char *) input->version);
      if ((input->free != NULL) && (input->base != NULL))
          input->free((xmlChar *) input->base);
-    if (input->buf != NULL) 
+    if (input->buf != NULL)
          xmlFreeParserInputBuffer(input->buf);
      xmlFree(input);
  }
@@ -1274,13 +1340,13 @@ xmlFreeInputStream(xmlParserInputPtr input) {
   * xmlNewInputStream:
   * @ctxt:  an XML parser context
   *
- * Create a new input stream structure
+ * Create a new input stream structure.
+ *
   * Returns the new input stream or NULL
   */
  xmlParserInputPtr
  xmlNewInputStream(xmlParserCtxtPtr ctxt) {
      xmlParserInputPtr input;
-    static int id = 0;
  
      input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
      if (input == NULL) {
@@ -1291,11 +1357,15 @@ xmlNewInputStream(xmlParserCtxtPtr ctxt) {
      input->line = 1;
      input->col = 1;
      input->standalone = -1;
+
      /*
-     * we don't care about thread reentrancy unicity for a single
-     * parser context (and hence thread) is sufficient.
+     * If the context is NULL the id cannot be initialized, but that
+     * should not happen while parsing which is the situation where
+     * the id is actually needed.
       */
-    input->id = id++;
+    if (ctxt != NULL)
+        input->id = ctxt->input_id++;
+
      return(input);
  }
  
@@ -1324,9 +1394,8 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
      }
      inputStream->filename = NULL;
      inputStream->buf = input;
-    inputStream->base = inputStream->buf->buffer->content;
-    inputStream->cur = inputStream->buf->buffer->content;
-    inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
+    xmlBufResetInput(inputStream->buf->buffer, inputStream);
+
      if (enc != XML_CHAR_ENCODING_NONE) {
          xmlSwitchEncoding(ctxt, enc);
      }
@@ -1387,8 +1456,11 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
      if (input == NULL) {
         return(NULL);
      }
-    input->filename = (char *) entity->URI;
+    if (entity->URI != NULL)
+       input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
      input->base = entity->content;
+    if (entity->length == 0)
+        entity->length = xmlStrlen(entity->content);
      input->cur = entity->content;
      input->length = entity->length;
      input->end = &entity->content[input->length];
@@ -1467,7 +1539,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
      inputStream = xmlCheckHTTPInput(ctxt, inputStream);
      if (inputStream == NULL)
          return(NULL);
-    
+
      if (inputStream->filename == NULL)
         URI = xmlStrdup((xmlChar *) filename);
      else
@@ -1478,9 +1550,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
      if (URI != NULL) xmlFree((char *) URI);
      inputStream->directory = directory;
  
-    inputStream->base = inputStream->buf->buffer->content;
-    inputStream->cur = inputStream->buf->buffer->content;
-    inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
+    xmlBufResetInput(inputStream->buf->buffer, inputStream);
      if ((ctxt->directory == NULL) && (directory != NULL))
          ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
      return(inputStream);
@@ -1519,6 +1589,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
          xmlErrMemory(NULL, "cannot initialize parser context\n");
         return(-1);
      }
+    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
+
      if (ctxt->sax == NULL)
         ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
      if (ctxt->sax == NULL) {
@@ -1630,12 +1702,20 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
      ctxt->nsWellFormed = 1;
      ctxt->valid = 1;
      ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
+    if (ctxt->loadsubset) {
+        ctxt->options |= XML_PARSE_DTDLOAD;
+    }
      ctxt->validate = xmlDoValidityCheckingDefaultValue;
      ctxt->pedantic = xmlPedanticParserDefaultValue;
+    if (ctxt->pedantic) {
+        ctxt->options |= XML_PARSE_PEDANTIC;
+    }
      ctxt->linenumbers = xmlLineNumbersDefaultValue;
      ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
-    if (ctxt->keepBlanks == 0)
+    if (ctxt->keepBlanks == 0) {
         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
+       ctxt->options |= XML_PARSE_NOBLANKS;
+    }
  
      ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
      ctxt->vctxt.userData = ctxt;
@@ -1647,8 +1727,12 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
         else
             ctxt->vctxt.warning = xmlParserValidityWarning;
         ctxt->vctxt.nodeMax = 0;
+        ctxt->options |= XML_PARSE_DTDVALID;
      }
      ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
+    if (ctxt->replaceEntities) {
+        ctxt->options |= XML_PARSE_NOENT;
+    }
      ctxt->record_info = 0;
      ctxt->nbChars = 0;
      ctxt->checkIndex = 0;
@@ -1657,6 +1741,10 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
      ctxt->depth = 0;
      ctxt->charset = XML_CHAR_ENCODING_UTF8;
      ctxt->catalogs = NULL;
+    ctxt->nbentities = 0;
+    ctxt->sizeentities = 0;
+    ctxt->sizeentcopy = 0;
+    ctxt->input_id = 1;
      xmlInitNodeInfoSeq(&ctxt->node_seq);
      return(0);
  }
@@ -1682,6 +1770,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
      if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
      if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
      if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
+    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
      if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
      if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
      if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
@@ -1701,7 +1790,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
      if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
      if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
      if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
-    if (ctxt->attsDefault != NULL) 
+    if (ctxt->attsDefault != NULL)
          xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
      if (ctxt->attsSpecial != NULL)
          xmlHashFree(ctxt->attsSpecial, NULL);
@@ -1801,7 +1890,7 @@ xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
   * @node:  an XML node within the tree
   *
   * Find the parser node info struct for a given node
- * 
+ *
   * Returns an xmlParserNodeInfo block pointer or NULL
   */
  const xmlParserNodeInfo *
@@ -1859,7 +1948,7 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
   * @seq:  a node info sequence pointer
   * @node:  an XML node pointer
   *
- * 
+ *
   * xmlParserFindNodeInfoIndex : Find the index that the info record for
   *   the given node is or should be at in a sorted sequence
   *
@@ -1916,7 +2005,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
      pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
                                       info->node);
  
-    if ((pos < ctxt->node_seq.length) && 
+    if ((pos < ctxt->node_seq.length) &&
          (ctxt->node_seq.buffer != NULL) &&
          (ctxt->node_seq.buffer[pos].node == info->node)) {
          ctxt->node_seq.buffer[pos] = *info;
@@ -1924,7 +2013,8 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
  
      /* Otherwise, we need to add new node to buffer */
      else {
-        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
+        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
+           (ctxt->node_seq.buffer == NULL)) {
              xmlParserNodeInfo *tmp_buffer;
              unsigned int byte_size;
  
@@ -1969,7 +2059,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
   ************************************************************************/
  /**
   * xmlPedanticParserDefault:
- * @val:  int 0 or 1 
+ * @val:  int 0 or 1
   *
   * Set and return the previous value for enabling pedantic warnings.
   *
@@ -1986,7 +2076,7 @@ xmlPedanticParserDefault(int val) {
  
  /**
   * xmlLineNumbersDefault:
- * @val:  int 0 or 1 
+ * @val:  int 0 or 1
   *
   * Set and return the previous value for enabling line numbers in elements
   * contents. This may break on old application and is turned off by default.
@@ -2004,7 +2094,7 @@ xmlLineNumbersDefault(int val) {
  
  /**
   * xmlSubstituteEntitiesDefault:
- * @val:  int 0 or 1 
+ * @val:  int 0 or 1
   *
   * Set and return the previous value for default entity support.
   * Initially the parser always keep entity references instead of substituting
@@ -2026,7 +2116,7 @@ xmlSubstituteEntitiesDefault(int val) {
  
  /**
   * xmlKeepBlanksDefault:
- * @val:  int 0 or 1 
+ * @val:  int 0 or 1
   *
   * Set and return the previous value for default blanks text nodes support.
   * The 1.x version of the parser used an heuristic to try to detect
@@ -2037,7 +2127,7 @@ xmlSubstituteEntitiesDefault(int val) {
   * ignorableWhitespace() are only generated when running the parser in
   * validating mode and when the current element doesn't allow CDATA or
   * mixed content.
- * This function is provided as a way to force the standard behavior 
+ * This function is provided as a way to force the standard behavior
   * on 1.X libs and to switch back to the old mode for compatibility when
   * running 1.X client code on 2.X . Upgrade of 1.X code should be done
   * by using xmlIsBlankNode() commodity function to detect the "empty"
@@ -2053,7 +2143,7 @@ xmlKeepBlanksDefault(int val) {
      int old = xmlKeepBlanksDefaultValue;
  
      xmlKeepBlanksDefaultValue = val;
-    xmlIndentTreeOutput = !val;
+    if (!val) xmlIndentTreeOutput = 1;
      return(old);
  }