Imported Upstream version 2.9.6_rc1
[platform/upstream/libxml2.git] / encoding.c
index d486dd6..cd019c5 100644 (file)
@@ -24,6 +24,7 @@
 #include "libxml.h"
 
 #include <string.h>
+#include <limits.h>
 
 #ifdef HAVE_CTYPE_H
 #include <ctype.h>
@@ -44,6 +45,9 @@
 #include <libxml/globals.h>
 #include <libxml/xmlerror.h>
 
+#include "buf.h"
+#include "enc.h"
+
 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
 
@@ -89,7 +93,7 @@ xmlEncodingErrMemory(const char *extra)
  *
  * n encoding error
  */
-static void
+static void LIBXML_ATTR_FORMAT(2,0)
 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
 {
     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
@@ -350,8 +354,14 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
 {
     int len;
 
-    if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
+    if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
        return(-1);
+    if (inb == NULL) {
+        /* inb == NULL means output is initialized. */
+        *outlen = 0;
+        *inlenb = 0;
+        return(0);
+    }
     if (*outlen > *inlenb) {
        len = *inlenb;
     } else {
@@ -1513,6 +1523,8 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
             if (handler != NULL) return(handler);
             handler = xmlFindCharEncodingHandler("EBCDIC-US");
             if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("IBM-037");
+            if (handler != NULL) return(handler);
            break;
         case XML_CHAR_ENCODING_UCS4BE:
             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
@@ -1825,7 +1837,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
 
 /************************************************************************
  *                                                                     *
- *             ICU based generic conversion functions                  *
+ *             ICU based generic conversion functions          *
  *                                                                     *
  ************************************************************************/
 
@@ -1897,9 +1909,61 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
  *             The real API used by libxml for on-the-fly conversion   *
  *                                                                     *
  ************************************************************************/
-int
-xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
-                       xmlBufferPtr in, int len);
+
+static int
+xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
+                 int *outlen, const unsigned char *in, int *inlen) {
+    int ret;
+
+    if (handler->input != NULL) {
+        ret = handler->input(out, outlen, in, inlen);
+    }
+#ifdef LIBXML_ICONV_ENABLED
+    else if (handler->iconv_in != NULL) {
+        ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
+    }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    else if (handler->uconv_in != NULL) {
+        ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
+    }
+#endif /* LIBXML_ICU_ENABLED */
+    else {
+        *outlen = 0;
+        *inlen = 0;
+        ret = -2;
+    }
+
+    return(ret);
+}
+
+/* Returns -4 if no output function was found. */
+static int
+xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
+                  int *outlen, const unsigned char *in, int *inlen) {
+    int ret;
+
+    if (handler->output != NULL) {
+        ret = handler->output(out, outlen, in, inlen);
+    }
+#ifdef LIBXML_ICONV_ENABLED
+    else if (handler->iconv_out != NULL) {
+        ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
+    }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+    else if (handler->uconv_out != NULL) {
+        ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
+    }
+#endif /* LIBXML_ICU_ENABLED */
+    else {
+        *outlen = 0;
+        *inlen = 0;
+        ret = -4;
+    }
+
+    return(ret);
+}
 
 /**
  * xmlCharEncFirstLineInt:
@@ -1919,7 +1983,7 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
 int
 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                        xmlBufferPtr in, int len) {
-    int ret = -2;
+    int ret;
     int written;
     int toconv;
 
@@ -1946,37 +2010,17 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
             toconv = 180;
     }
     if (toconv * 2 >= written) {
-        xmlBufferGrow(out, toconv);
+        xmlBufferGrow(out, toconv * 2);
        written = out->size - out->use - 1;
     }
 
-    if (handler->input != NULL) {
-       ret = handler->input(&out->content[out->use], &written,
-                            in->content, &toconv);
-       xmlBufferShrink(in, toconv);
-       out->use += written;
-       out->content[out->use] = 0;
-    }
-#ifdef LIBXML_ICONV_ENABLED
-    else if (handler->iconv_in != NULL) {
-       ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
-                             &written, in->content, &toconv);
-       xmlBufferShrink(in, toconv);
-       out->use += written;
-       out->content[out->use] = 0;
-       if (ret == -1) ret = -3;
-    }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
-    else if (handler->uconv_in != NULL) {
-       ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
-                             &written, in->content, &toconv);
-       xmlBufferShrink(in, toconv);
-       out->use += written;
-       out->content[out->use] = 0;
-       if (ret == -1) ret = -3;
-    }
-#endif /* LIBXML_ICU_ENABLED */
+    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
+                           in->content, &toconv);
+    xmlBufferShrink(in, toconv);
+    out->use += written;
+    out->content[out->use] = 0;
+    if (ret == -1) ret = -3;
+
 #ifdef DEBUG_ENCODING
     switch (ret) {
         case 0:
@@ -2029,6 +2073,214 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
 }
 
 /**
+ * xmlCharEncFirstLineInput:
+ * @input: a parser input buffer
+ * @len:  number of bytes to convert for the first line, or -1
+ *
+ * Front-end for the encoding handler input function, but handle only
+ * the very first line. Point is that this is based on autodetection
+ * of the encoding and once that first line is converted we may find
+ * out that a different decoder is needed to process the input.
+ *
+ * Returns the number of byte written if success, or
+ *     -1 general error
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
+{
+    int ret;
+    size_t written;
+    size_t toconv;
+    int c_in;
+    int c_out;
+    xmlBufPtr in;
+    xmlBufPtr out;
+
+    if ((input == NULL) || (input->encoder == NULL) ||
+        (input->buffer == NULL) || (input->raw == NULL))
+        return (-1);
+    out = input->buffer;
+    in = input->raw;
+
+    toconv = xmlBufUse(in);
+    if (toconv == 0)
+        return (0);
+    written = xmlBufAvail(out) - 1; /* count '\0' */
+    /*
+     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
+     * 45 chars should be sufficient to reach the end of the encoding
+     * declaration without going too far inside the document content.
+     * on UTF-16 this means 90bytes, on UCS4 this means 180
+     * The actual value depending on guessed encoding is passed as @len
+     * if provided
+     */
+    if (len >= 0) {
+        if (toconv > (unsigned int) len)
+            toconv = len;
+    } else {
+        if (toconv > 180)
+            toconv = 180;
+    }
+    if (toconv * 2 >= written) {
+        xmlBufGrow(out, toconv * 2);
+        written = xmlBufAvail(out) - 1;
+    }
+    if (written > 360)
+        written = 360;
+
+    c_in = toconv;
+    c_out = written;
+    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
+                           xmlBufContent(in), &c_in);
+    xmlBufShrink(in, c_in);
+    xmlBufAddLen(out, c_out);
+    if (ret == -1)
+        ret = -3;
+
+    switch (ret) {
+        case 0:
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                            "converted %d bytes to %d bytes of input\n",
+                            c_in, c_out);
+#endif
+            break;
+        case -1:
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                         "converted %d bytes to %d bytes of input, %d left\n",
+                            c_in, c_out, (int)xmlBufUse(in));
+#endif
+            break;
+        case -3:
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                        "converted %d bytes to %d bytes of input, %d left\n",
+                            c_in, c_out, (int)xmlBufUse(in));
+#endif
+            break;
+        case -2: {
+            char buf[50];
+            const xmlChar *content = xmlBufContent(in);
+
+           snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
+                    content[0], content[1],
+                    content[2], content[3]);
+           buf[49] = 0;
+           xmlEncodingErr(XML_I18N_CONV_FAILED,
+                   "input conversion failed due to input error, bytes %s\n",
+                          buf);
+        }
+    }
+    /*
+     * Ignore when input buffer is not on a boundary
+     */
+    if (ret == -3) ret = 0;
+    if (ret == -1) ret = 0;
+    return(ret);
+}
+
+/**
+ * xmlCharEncInput:
+ * @input: a parser input buffer
+ * @flush: try to flush all the raw buffer
+ *
+ * Generic front-end for the encoding handler on parser input
+ *
+ * Returns the number of byte written if success, or
+ *     -1 general error
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
+{
+    int ret;
+    size_t written;
+    size_t toconv;
+    int c_in;
+    int c_out;
+    xmlBufPtr in;
+    xmlBufPtr out;
+
+    if ((input == NULL) || (input->encoder == NULL) ||
+        (input->buffer == NULL) || (input->raw == NULL))
+        return (-1);
+    out = input->buffer;
+    in = input->raw;
+
+    toconv = xmlBufUse(in);
+    if (toconv == 0)
+        return (0);
+    if ((toconv > 64 * 1024) && (flush == 0))
+        toconv = 64 * 1024;
+    written = xmlBufAvail(out);
+    if (written > 0)
+        written--; /* count '\0' */
+    if (toconv * 2 >= written) {
+        xmlBufGrow(out, toconv * 2);
+        written = xmlBufAvail(out);
+        if (written > 0)
+            written--; /* count '\0' */
+    }
+    if ((written > 128 * 1024) && (flush == 0))
+        written = 128 * 1024;
+
+    c_in = toconv;
+    c_out = written;
+    ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
+                           xmlBufContent(in), &c_in);
+    xmlBufShrink(in, c_in);
+    xmlBufAddLen(out, c_out);
+    if (ret == -1)
+        ret = -3;
+
+    switch (ret) {
+        case 0:
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                            "converted %d bytes to %d bytes of input\n",
+                            c_in, c_out);
+#endif
+            break;
+        case -1:
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                         "converted %d bytes to %d bytes of input, %d left\n",
+                            c_in, c_out, (int)xmlBufUse(in));
+#endif
+            break;
+        case -3:
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                        "converted %d bytes to %d bytes of input, %d left\n",
+                            c_in, c_out, (int)xmlBufUse(in));
+#endif
+            break;
+        case -2: {
+            char buf[50];
+            const xmlChar *content = xmlBufContent(in);
+
+           snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
+                    content[0], content[1],
+                    content[2], content[3]);
+           buf[49] = 0;
+           xmlEncodingErr(XML_I18N_CONV_FAILED,
+                   "input conversion failed due to input error, bytes %s\n",
+                          buf);
+        }
+    }
+    /*
+     * Ignore when input buffer is not on a boundary
+     */
+    if (ret == -3)
+        ret = 0;
+    return (c_out? c_out : ret);
+}
+
+/**
  * xmlCharEncInFunc:
  * @handler:   char encoding transformation data structure
  * @out:  an xmlBuffer for the output.
@@ -2045,7 +2297,7 @@ int
 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
                  xmlBufferPtr in)
 {
-    int ret = -2;
+    int ret;
     int written;
     int toconv;
 
@@ -2064,35 +2316,14 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
         xmlBufferGrow(out, out->size + toconv * 2);
         written = out->size - out->use - 1;
     }
-    if (handler->input != NULL) {
-        ret = handler->input(&out->content[out->use], &written,
-                             in->content, &toconv);
-        xmlBufferShrink(in, toconv);
-        out->use += written;
-        out->content[out->use] = 0;
-    }
-#ifdef LIBXML_ICONV_ENABLED
-    else if (handler->iconv_in != NULL) {
-        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
-                              &written, in->content, &toconv);
-        xmlBufferShrink(in, toconv);
-        out->use += written;
-        out->content[out->use] = 0;
-        if (ret == -1)
-            ret = -3;
-    }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
-    else if (handler->uconv_in != NULL) {
-        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
-                              &written, in->content, &toconv);
-        xmlBufferShrink(in, toconv);
-        out->use += written;
-        out->content[out->use] = 0;
-        if (ret == -1)
-            ret = -3;
-    }
-#endif /* LIBXML_ICU_ENABLED */
+    ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
+                           in->content, &toconv);
+    xmlBufferShrink(in, toconv);
+    out->use += written;
+    out->content[out->use] = 0;
+    if (ret == -1)
+        ret = -3;
+
     switch (ret) {
         case 0:
 #ifdef DEBUG_ENCODING
@@ -2135,6 +2366,181 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
     return (written? written : ret);
 }
 
+#ifdef LIBXML_OUTPUT_ENABLED
+/**
+ * xmlCharEncOutput:
+ * @output: a parser output buffer
+ * @init: is this an initialization call without data
+ *
+ * Generic front-end for the encoding handler on parser output
+ * a first call with @init == 1 has to be made first to initiate the
+ * output in case of non-stateless encoding needing to initiate their
+ * state or the output (like the BOM in UTF16).
+ * In case of UTF8 sequence conversion errors for the given encoder,
+ * the content will be automatically remapped to a CharRef sequence.
+ *
+ * Returns the number of byte written if success, or
+ *     -1 general error
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncOutput(xmlOutputBufferPtr output, int init)
+{
+    int ret;
+    size_t written;
+    size_t writtentot = 0;
+    size_t toconv;
+    int c_in;
+    int c_out;
+    xmlBufPtr in;
+    xmlBufPtr out;
+
+    if ((output == NULL) || (output->encoder == NULL) ||
+        (output->buffer == NULL) || (output->conv == NULL))
+        return (-1);
+    out = output->conv;
+    in = output->buffer;
+
+retry:
+
+    written = xmlBufAvail(out);
+    if (written > 0)
+        written--; /* count '\0' */
+
+    /*
+     * First specific handling of the initialization call
+     */
+    if (init) {
+        c_in = 0;
+        c_out = written;
+        /* TODO: Check return value. */
+        xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+                          NULL, &c_in);
+        xmlBufAddLen(out, c_out);
+#ifdef DEBUG_ENCODING
+       xmlGenericError(xmlGenericErrorContext,
+               "initialized encoder\n");
+#endif
+        return(0);
+    }
+
+    /*
+     * Conversion itself.
+     */
+    toconv = xmlBufUse(in);
+    if (toconv == 0)
+        return (0);
+    if (toconv > 64 * 1024)
+        toconv = 64 * 1024;
+    if (toconv * 4 >= written) {
+        xmlBufGrow(out, toconv * 4);
+        written = xmlBufAvail(out) - 1;
+    }
+    if (written > 256 * 1024)
+        written = 256 * 1024;
+
+    c_in = toconv;
+    c_out = written;
+    ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+                            xmlBufContent(in), &c_in);
+    xmlBufShrink(in, c_in);
+    xmlBufAddLen(out, c_out);
+    writtentot += c_out;
+    if (ret == -1) {
+        if (c_out > 0) {
+            /* Can be a limitation of iconv or uconv */
+            goto retry;
+        }
+        ret = -3;
+    }
+
+    if (ret >= 0) output += ret;
+
+    /*
+     * Attempt to handle error cases
+     */
+    switch (ret) {
+        case 0:
+#ifdef DEBUG_ENCODING
+           xmlGenericError(xmlGenericErrorContext,
+                   "converted %d bytes to %d bytes of output\n",
+                   c_in, c_out);
+#endif
+           break;
+        case -1:
+#ifdef DEBUG_ENCODING
+           xmlGenericError(xmlGenericErrorContext,
+                   "output conversion failed by lack of space\n");
+#endif
+           break;
+        case -3:
+#ifdef DEBUG_ENCODING
+           xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
+                   c_in, c_out, (int) xmlBufUse(in));
+#endif
+           break;
+        case -4:
+            xmlEncodingErr(XML_I18N_NO_OUTPUT,
+                           "xmlCharEncOutFunc: no output function !\n", NULL);
+            ret = -1;
+            break;
+        case -2: {
+           xmlChar charref[20];
+           int len = (int) xmlBufUse(in);
+            xmlChar *content = xmlBufContent(in);
+           int cur, charrefLen;
+
+           cur = xmlGetUTF8Char(content, &len);
+           if (cur <= 0)
+                break;
+
+#ifdef DEBUG_ENCODING
+            xmlGenericError(xmlGenericErrorContext,
+                    "handling output conversion error\n");
+            xmlGenericError(xmlGenericErrorContext,
+                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+                    content[0], content[1],
+                    content[2], content[3]);
+#endif
+            /*
+             * Removes the UTF8 sequence, and replace it by a charref
+             * and continue the transcoding phase, hoping the error
+             * did not mangle the encoder state.
+             */
+            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+                             "&#%d;", cur);
+            xmlBufShrink(in, len);
+            xmlBufGrow(out, charrefLen * 4);
+            c_out = xmlBufAvail(out) - 1;
+            c_in = charrefLen;
+            ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+                                    charref, &c_in);
+
+           if ((ret < 0) || (c_in != charrefLen)) {
+               char buf[50];
+
+               snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
+                        content[0], content[1],
+                        content[2], content[3]);
+               buf[49] = 0;
+               xmlEncodingErr(XML_I18N_CONV_FAILED,
+                   "output conversion failed due to conv error, bytes %s\n",
+                              buf);
+               if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
+                   content[0] = ' ';
+                break;
+           }
+
+            xmlBufAddLen(out, c_out);
+            writtentot += c_out;
+            goto retry;
+       }
+    }
+    return(ret);
+}
+#endif
+
 /**
  * xmlCharEncOutFunc:
  * @handler:   char enconding transformation data structure
@@ -2156,12 +2562,11 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
 int
 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                   xmlBufferPtr in) {
-    int ret = -2;
+    int ret;
     int written;
     int writtentot = 0;
     int toconv;
     int output = 0;
-    int charref_len = 0;
 
     if (handler == NULL) return(-1);
     if (out == NULL) return(-1);
@@ -2178,31 +2583,11 @@ retry:
      */
     if (in == NULL) {
         toconv = 0;
-       if (handler->output != NULL) {
-           ret = handler->output(&out->content[out->use], &written,
-                                 NULL, &toconv);
-           if (ret >= 0) { /* Gennady: check return value */
-               out->use += written;
-               out->content[out->use] = 0;
-           }
-       }
-#ifdef LIBXML_ICONV_ENABLED
-       else if (handler->iconv_out != NULL) {
-           ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
-                                 &written, NULL, &toconv);
-           out->use += written;
-           out->content[out->use] = 0;
-       }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
-       else if (handler->uconv_out != NULL) {
-           ret = xmlUconvWrapper(handler->uconv_out, 0,
-                              &out->content[out->use],
-                                             &written, NULL, &toconv);
-           out->use += written;
-           out->content[out->use] = 0;
-       }
-#endif /* LIBXML_ICU_ENABLED */
+        /* TODO: Check return value. */
+        xmlEncOutputChunk(handler, &out->content[out->use], &written,
+                          NULL, &toconv);
+        out->use += written;
+        out->content[out->use] = 0;
 #ifdef DEBUG_ENCODING
        xmlGenericError(xmlGenericErrorContext,
                "initialized encoder\n");
@@ -2220,61 +2605,18 @@ retry:
         xmlBufferGrow(out, toconv * 4);
        written = out->size - out->use - 1;
     }
-    if (handler->output != NULL) {
-       ret = handler->output(&out->content[out->use], &written,
-                             in->content, &toconv);
-       if (written > 0) {
-           xmlBufferShrink(in, toconv);
-           out->use += written;
-           writtentot += written;
-       }
-       out->content[out->use] = 0;
-    }
-#ifdef LIBXML_ICONV_ENABLED
-    else if (handler->iconv_out != NULL) {
-       ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
-                             &written, in->content, &toconv);
-       xmlBufferShrink(in, toconv);
-       out->use += written;
-       writtentot += written;
-       out->content[out->use] = 0;
-       if (ret == -1) {
-           if (written > 0) {
-               /*
-                * Can be a limitation of iconv
-                */
-                charref_len = 0;
-               goto retry;
-           }
-           ret = -3;
-       }
-    }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
-    else if (handler->uconv_out != NULL) {
-       ret = xmlUconvWrapper(handler->uconv_out, 0,
-                              &out->content[out->use],
-                             &written, in->content, &toconv);
-       xmlBufferShrink(in, toconv);
-       out->use += written;
-       writtentot += written;
-       out->content[out->use] = 0;
-       if (ret == -1) {
-           if (written > 0) {
-               /*
-                * Can be a limitation of iconv
-                */
-                charref_len = 0;
-               goto retry;
-           }
-           ret = -3;
-       }
-    }
-#endif /* LIBXML_ICU_ENABLED */
-    else {
-       xmlEncodingErr(XML_I18N_NO_OUTPUT,
-                      "xmlCharEncOutFunc: no output function !\n", NULL);
-       return(-1);
+    ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+                            in->content, &toconv);
+    xmlBufferShrink(in, toconv);
+    out->use += written;
+    writtentot += written;
+    out->content[out->use] = 0;
+    if (ret == -1) {
+        if (written > 0) {
+            /* Can be a limitation of iconv or uconv */
+            goto retry;
+        }
+        ret = -3;
     }
 
     if (ret >= 0) output += ret;
@@ -2302,47 +2644,44 @@ retry:
                    toconv, written, in->use);
 #endif
            break;
+        case -4:
+           xmlEncodingErr(XML_I18N_NO_OUTPUT,
+                          "xmlCharEncOutFunc: no output function !\n", NULL);
+           ret = -1;
+            break;
         case -2: {
+           xmlChar charref[20];
            int len = in->use;
            const xmlChar *utf = (const xmlChar *) in->content;
-           int cur;
+           int cur, charrefLen;
 
            cur = xmlGetUTF8Char(utf, &len);
-           if ((charref_len != 0) && (written < charref_len)) {
-               /*
-                * We attempted to insert a character reference and failed.
-                * Undo what was written and skip the remaining charref.
-                */
-               out->use -= written;
-               writtentot -= written;
-               xmlBufferShrink(in, charref_len - written);
-               charref_len = 0;
-
-               ret = -1;
+           if (cur <= 0)
                 break;
-           } else if (cur > 0) {
-               xmlChar charref[20];
 
 #ifdef DEBUG_ENCODING
-               xmlGenericError(xmlGenericErrorContext,
-                       "handling output conversion error\n");
-               xmlGenericError(xmlGenericErrorContext,
-                       "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
-                       in->content[0], in->content[1],
-                       in->content[2], in->content[3]);
+            xmlGenericError(xmlGenericErrorContext,
+                    "handling output conversion error\n");
+            xmlGenericError(xmlGenericErrorContext,
+                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+                    in->content[0], in->content[1],
+                    in->content[2], in->content[3]);
 #endif
-               /*
-                * Removes the UTF8 sequence, and replace it by a charref
-                * and continue the transcoding phase, hoping the error
-                * did not mangle the encoder state.
-                */
-               charref_len = snprintf((char *) &charref[0], sizeof(charref),
-                                "&#%d;", cur);
-               xmlBufferShrink(in, len);
-               xmlBufferAddHead(in, charref, -1);
-
-               goto retry;
-           } else {
+            /*
+             * Removes the UTF8 sequence, and replace it by a charref
+             * and continue the transcoding phase, hoping the error
+             * did not mangle the encoder state.
+             */
+            charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+                             "&#%d;", cur);
+            xmlBufferShrink(in, len);
+            xmlBufferGrow(out, charrefLen * 4);
+           written = out->size - out->use - 1;
+            toconv = charrefLen;
+            ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+                                    charref, &toconv);
+
+           if ((ret < 0) || (toconv != charrefLen)) {
                char buf[50];
 
                snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@@ -2354,8 +2693,13 @@ retry:
                               buf);
                if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
                    in->content[0] = ' ';
+               break;
            }
-           break;
+
+            out->use += written;
+            writtentot += written;
+            out->content[out->use] = 0;
+            goto retry;
        }
     }
     return(ret);
@@ -2373,14 +2717,25 @@ int
 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
     int ret = 0;
     int tofree = 0;
+    int i, handler_in_list = 0;
+
     if (handler == NULL) return(-1);
     if (handler->name == NULL) return(-1);
+    if (handlers != NULL) {
+        for (i = 0;i < nbCharEncodingHandler; i++) {
+            if (handler == handlers[i]) {
+               handler_in_list = 1;
+               break;
+           }
+       }
+    }
 #ifdef LIBXML_ICONV_ENABLED
     /*
      * Iconv handlers can be used only once, free the whole block.
      * and the associated icon resources.
      */
-    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
+    if ((handler_in_list == 0) &&
+        ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
         tofree = 1;
        if (handler->iconv_out != NULL) {
            if (iconv_close(handler->iconv_out))
@@ -2395,7 +2750,8 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
     }
 #endif /* LIBXML_ICONV_ENABLED */
 #ifdef LIBXML_ICU_ENABLED
-    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
+    if ((handler_in_list == 0) &&
+        ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
         tofree = 1;
        if (handler->uconv_out != NULL) {
            closeIcuConverter(handler->uconv_out);
@@ -2462,54 +2818,20 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
 
            int ret;
 
-           if (handler->output != NULL) {
-               do {
-                   toconv = in->end - cur;
-                   written = 32000;
-                   ret = handler->output(&convbuf[0], &written,
-                                     cur, &toconv);
-                   if (ret == -1) return(-1);
-                   unused += written;
-                   cur += toconv;
-               } while (ret == -2);
-#ifdef LIBXML_ICONV_ENABLED
-           } else if (handler->iconv_out != NULL) {
-               do {
-                   toconv = in->end - cur;
-                   written = 32000;
-                   ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
-                             &written, cur, &toconv);
-                   if (ret < 0) {
-                       if (written > 0)
-                           ret = -2;
-                       else
-                           return(-1);
-                   }
-                   unused += written;
-                   cur += toconv;
-               } while (ret == -2);
-#endif
-#ifdef LIBXML_ICU_ENABLED
-           } else if (handler->uconv_out != NULL) {
-               do {
-                   toconv = in->end - cur;
-                   written = 32000;
-                   ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
-                             &written, cur, &toconv);
-                   if (ret < 0) {
-                       if (written > 0)
-                           ret = -2;
-                       else
-                           return(-1);
-                   }
-                   unused += written;
-                   cur += toconv;
-               } while (ret == -2);
-#endif
-            } else {
-               /* could not find a converter */
-               return(-1);
-           }
+            do {
+                toconv = in->end - cur;
+                written = 32000;
+                ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
+                                        cur, &toconv);
+                if (ret < 0) {
+                    if (written > 0)
+                        ret = -2;
+                    else
+                        return(-1);
+                }
+                unused += written;
+                cur += toconv;
+            } while (ret == -2);
        }
        if (in->buf->rawconsumed < unused)
            return(-1);
@@ -2619,7 +2941,7 @@ UTF8ToISO8859x(unsigned char* out, int *outlen,
             c2 = c2 & 0x3F;
            d = d & 0x0F;
            d = xlattable [48 + c2 + xlattable [48 + c1 +
-                       xlattable [32 + d] * 64] * 64];
+                       xlattable [32 + d] * 64] * 64];
             if (d == 0) {
                 /* not in character set */
                 *outlen = out - outstart;