#include "libxml.h"
#include <string.h>
+#include <limits.h>
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#include <libxml/globals.h>
#include <libxml/xmlerror.h>
+#include "buf.h"
+#include "enc.h"
+
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
*
* n encoding error
*/
-static void
+static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
{
__xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
{
int len;
- if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
+ if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
return(-1);
+ if (inb == NULL) {
+ /* inb == NULL means output is initialized. */
+ *outlen = 0;
+ *inlenb = 0;
+ return(0);
+ }
if (*outlen > *inlenb) {
len = *inlenb;
} else {
if (handler != NULL) return(handler);
handler = xmlFindCharEncodingHandler("EBCDIC-US");
if (handler != NULL) return(handler);
+ handler = xmlFindCharEncodingHandler("IBM-037");
+ if (handler != NULL) return(handler);
break;
case XML_CHAR_ENCODING_UCS4BE:
handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
/************************************************************************
* *
- * ICU based generic conversion functions *
+ * ICU based generic conversion functions *
* *
************************************************************************/
* The real API used by libxml for on-the-fly conversion *
* *
************************************************************************/
-int
-xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
- xmlBufferPtr in, int len);
+
+static int
+xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
+ int *outlen, const unsigned char *in, int *inlen) {
+ int ret;
+
+ if (handler->input != NULL) {
+ ret = handler->input(out, outlen, in, inlen);
+ }
+#ifdef LIBXML_ICONV_ENABLED
+ else if (handler->iconv_in != NULL) {
+ ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
+ }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_in != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
+ }
+#endif /* LIBXML_ICU_ENABLED */
+ else {
+ *outlen = 0;
+ *inlen = 0;
+ ret = -2;
+ }
+
+ return(ret);
+}
+
+/* Returns -4 if no output function was found. */
+static int
+xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
+ int *outlen, const unsigned char *in, int *inlen) {
+ int ret;
+
+ if (handler->output != NULL) {
+ ret = handler->output(out, outlen, in, inlen);
+ }
+#ifdef LIBXML_ICONV_ENABLED
+ else if (handler->iconv_out != NULL) {
+ ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
+ }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_out != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
+ }
+#endif /* LIBXML_ICU_ENABLED */
+ else {
+ *outlen = 0;
+ *inlen = 0;
+ ret = -4;
+ }
+
+ return(ret);
+}
/**
* xmlCharEncFirstLineInt:
int
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in, int len) {
- int ret = -2;
+ int ret;
int written;
int toconv;
toconv = 180;
}
if (toconv * 2 >= written) {
- xmlBufferGrow(out, toconv);
+ xmlBufferGrow(out, toconv * 2);
written = out->size - out->use - 1;
}
- if (handler->input != NULL) {
- ret = handler->input(&out->content[out->use], &written,
- in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_in != NULL) {
- ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1) ret = -3;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_in != NULL) {
- ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1) ret = -3;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
+ in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1) ret = -3;
+
#ifdef DEBUG_ENCODING
switch (ret) {
case 0:
}
/**
+ * xmlCharEncFirstLineInput:
+ * @input: a parser input buffer
+ * @len: number of bytes to convert for the first line, or -1
+ *
+ * Front-end for the encoding handler input function, but handle only
+ * the very first line. Point is that this is based on autodetection
+ * of the encoding and once that first line is converted we may find
+ * out that a different decoder is needed to process the input.
+ *
+ * Returns the number of byte written if success, or
+ * -1 general error
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
+{
+ int ret;
+ size_t written;
+ size_t toconv;
+ int c_in;
+ int c_out;
+ xmlBufPtr in;
+ xmlBufPtr out;
+
+ if ((input == NULL) || (input->encoder == NULL) ||
+ (input->buffer == NULL) || (input->raw == NULL))
+ return (-1);
+ out = input->buffer;
+ in = input->raw;
+
+ toconv = xmlBufUse(in);
+ if (toconv == 0)
+ return (0);
+ written = xmlBufAvail(out) - 1; /* count '\0' */
+ /*
+ * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
+ * 45 chars should be sufficient to reach the end of the encoding
+ * declaration without going too far inside the document content.
+ * on UTF-16 this means 90bytes, on UCS4 this means 180
+ * The actual value depending on guessed encoding is passed as @len
+ * if provided
+ */
+ if (len >= 0) {
+ if (toconv > (unsigned int) len)
+ toconv = len;
+ } else {
+ if (toconv > 180)
+ toconv = 180;
+ }
+ if (toconv * 2 >= written) {
+ xmlBufGrow(out, toconv * 2);
+ written = xmlBufAvail(out) - 1;
+ }
+ if (written > 360)
+ written = 360;
+
+ c_in = toconv;
+ c_out = written;
+ ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
+ xmlBufContent(in), &c_in);
+ xmlBufShrink(in, c_in);
+ xmlBufAddLen(out, c_out);
+ if (ret == -1)
+ ret = -3;
+
+ switch (ret) {
+ case 0:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of input\n",
+ c_in, c_out);
+#endif
+ break;
+ case -1:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of input, %d left\n",
+ c_in, c_out, (int)xmlBufUse(in));
+#endif
+ break;
+ case -3:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of input, %d left\n",
+ c_in, c_out, (int)xmlBufUse(in));
+#endif
+ break;
+ case -2: {
+ char buf[50];
+ const xmlChar *content = xmlBufContent(in);
+
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
+ content[0], content[1],
+ content[2], content[3]);
+ buf[49] = 0;
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
+ "input conversion failed due to input error, bytes %s\n",
+ buf);
+ }
+ }
+ /*
+ * Ignore when input buffer is not on a boundary
+ */
+ if (ret == -3) ret = 0;
+ if (ret == -1) ret = 0;
+ return(ret);
+}
+
+/**
+ * xmlCharEncInput:
+ * @input: a parser input buffer
+ * @flush: try to flush all the raw buffer
+ *
+ * Generic front-end for the encoding handler on parser input
+ *
+ * Returns the number of byte written if success, or
+ * -1 general error
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
+{
+ int ret;
+ size_t written;
+ size_t toconv;
+ int c_in;
+ int c_out;
+ xmlBufPtr in;
+ xmlBufPtr out;
+
+ if ((input == NULL) || (input->encoder == NULL) ||
+ (input->buffer == NULL) || (input->raw == NULL))
+ return (-1);
+ out = input->buffer;
+ in = input->raw;
+
+ toconv = xmlBufUse(in);
+ if (toconv == 0)
+ return (0);
+ if ((toconv > 64 * 1024) && (flush == 0))
+ toconv = 64 * 1024;
+ written = xmlBufAvail(out);
+ if (written > 0)
+ written--; /* count '\0' */
+ if (toconv * 2 >= written) {
+ xmlBufGrow(out, toconv * 2);
+ written = xmlBufAvail(out);
+ if (written > 0)
+ written--; /* count '\0' */
+ }
+ if ((written > 128 * 1024) && (flush == 0))
+ written = 128 * 1024;
+
+ c_in = toconv;
+ c_out = written;
+ ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
+ xmlBufContent(in), &c_in);
+ xmlBufShrink(in, c_in);
+ xmlBufAddLen(out, c_out);
+ if (ret == -1)
+ ret = -3;
+
+ switch (ret) {
+ case 0:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of input\n",
+ c_in, c_out);
+#endif
+ break;
+ case -1:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of input, %d left\n",
+ c_in, c_out, (int)xmlBufUse(in));
+#endif
+ break;
+ case -3:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of input, %d left\n",
+ c_in, c_out, (int)xmlBufUse(in));
+#endif
+ break;
+ case -2: {
+ char buf[50];
+ const xmlChar *content = xmlBufContent(in);
+
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
+ content[0], content[1],
+ content[2], content[3]);
+ buf[49] = 0;
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
+ "input conversion failed due to input error, bytes %s\n",
+ buf);
+ }
+ }
+ /*
+ * Ignore when input buffer is not on a boundary
+ */
+ if (ret == -3)
+ ret = 0;
+ return (c_out? c_out : ret);
+}
+
+/**
* xmlCharEncInFunc:
* @handler: char encoding transformation data structure
* @out: an xmlBuffer for the output.
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
xmlBufferPtr in)
{
- int ret = -2;
+ int ret;
int written;
int toconv;
xmlBufferGrow(out, out->size + toconv * 2);
written = out->size - out->use - 1;
}
- if (handler->input != NULL) {
- ret = handler->input(&out->content[out->use], &written,
- in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_in != NULL) {
- ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_in != NULL) {
- ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
+ in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1)
+ ret = -3;
+
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
return (written? written : ret);
}
+#ifdef LIBXML_OUTPUT_ENABLED
+/**
+ * xmlCharEncOutput:
+ * @output: a parser output buffer
+ * @init: is this an initialization call without data
+ *
+ * Generic front-end for the encoding handler on parser output
+ * a first call with @init == 1 has to be made first to initiate the
+ * output in case of non-stateless encoding needing to initiate their
+ * state or the output (like the BOM in UTF16).
+ * In case of UTF8 sequence conversion errors for the given encoder,
+ * the content will be automatically remapped to a CharRef sequence.
+ *
+ * Returns the number of byte written if success, or
+ * -1 general error
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncOutput(xmlOutputBufferPtr output, int init)
+{
+ int ret;
+ size_t written;
+ size_t writtentot = 0;
+ size_t toconv;
+ int c_in;
+ int c_out;
+ xmlBufPtr in;
+ xmlBufPtr out;
+
+ if ((output == NULL) || (output->encoder == NULL) ||
+ (output->buffer == NULL) || (output->conv == NULL))
+ return (-1);
+ out = output->conv;
+ in = output->buffer;
+
+retry:
+
+ written = xmlBufAvail(out);
+ if (written > 0)
+ written--; /* count '\0' */
+
+ /*
+ * First specific handling of the initialization call
+ */
+ if (init) {
+ c_in = 0;
+ c_out = written;
+ /* TODO: Check return value. */
+ xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ NULL, &c_in);
+ xmlBufAddLen(out, c_out);
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "initialized encoder\n");
+#endif
+ return(0);
+ }
+
+ /*
+ * Conversion itself.
+ */
+ toconv = xmlBufUse(in);
+ if (toconv == 0)
+ return (0);
+ if (toconv > 64 * 1024)
+ toconv = 64 * 1024;
+ if (toconv * 4 >= written) {
+ xmlBufGrow(out, toconv * 4);
+ written = xmlBufAvail(out) - 1;
+ }
+ if (written > 256 * 1024)
+ written = 256 * 1024;
+
+ c_in = toconv;
+ c_out = written;
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ xmlBufContent(in), &c_in);
+ xmlBufShrink(in, c_in);
+ xmlBufAddLen(out, c_out);
+ writtentot += c_out;
+ if (ret == -1) {
+ if (c_out > 0) {
+ /* Can be a limitation of iconv or uconv */
+ goto retry;
+ }
+ ret = -3;
+ }
+
+ if (ret >= 0) output += ret;
+
+ /*
+ * Attempt to handle error cases
+ */
+ switch (ret) {
+ case 0:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "converted %d bytes to %d bytes of output\n",
+ c_in, c_out);
+#endif
+ break;
+ case -1:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "output conversion failed by lack of space\n");
+#endif
+ break;
+ case -3:
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
+ c_in, c_out, (int) xmlBufUse(in));
+#endif
+ break;
+ case -4:
+ xmlEncodingErr(XML_I18N_NO_OUTPUT,
+ "xmlCharEncOutFunc: no output function !\n", NULL);
+ ret = -1;
+ break;
+ case -2: {
+ xmlChar charref[20];
+ int len = (int) xmlBufUse(in);
+ xmlChar *content = xmlBufContent(in);
+ int cur, charrefLen;
+
+ cur = xmlGetUTF8Char(content, &len);
+ if (cur <= 0)
+ break;
+
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ content[0], content[1],
+ content[2], content[3]);
+#endif
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufShrink(in, len);
+ xmlBufGrow(out, charrefLen * 4);
+ c_out = xmlBufAvail(out) - 1;
+ c_in = charrefLen;
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ charref, &c_in);
+
+ if ((ret < 0) || (c_in != charrefLen)) {
+ char buf[50];
+
+ snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
+ content[0], content[1],
+ content[2], content[3]);
+ buf[49] = 0;
+ xmlEncodingErr(XML_I18N_CONV_FAILED,
+ "output conversion failed due to conv error, bytes %s\n",
+ buf);
+ if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
+ content[0] = ' ';
+ break;
+ }
+
+ xmlBufAddLen(out, c_out);
+ writtentot += c_out;
+ goto retry;
+ }
+ }
+ return(ret);
+}
+#endif
+
/**
* xmlCharEncOutFunc:
* @handler: char enconding transformation data structure
int
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in) {
- int ret = -2;
+ int ret;
int written;
int writtentot = 0;
int toconv;
int output = 0;
- int charref_len = 0;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
*/
if (in == NULL) {
toconv = 0;
- if (handler->output != NULL) {
- ret = handler->output(&out->content[out->use], &written,
- NULL, &toconv);
- if (ret >= 0) { /* Gennady: check return value */
- out->use += written;
- out->content[out->use] = 0;
- }
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_out != NULL) {
- ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
- &written, NULL, &toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_out != NULL) {
- ret = xmlUconvWrapper(handler->uconv_out, 0,
- &out->content[out->use],
- &written, NULL, &toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ /* TODO: Check return value. */
+ xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ NULL, &toconv);
+ out->use += written;
+ out->content[out->use] = 0;
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
xmlBufferGrow(out, toconv * 4);
written = out->size - out->use - 1;
}
- if (handler->output != NULL) {
- ret = handler->output(&out->content[out->use], &written,
- in->content, &toconv);
- if (written > 0) {
- xmlBufferShrink(in, toconv);
- out->use += written;
- writtentot += written;
- }
- out->content[out->use] = 0;
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_out != NULL) {
- ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- writtentot += written;
- out->content[out->use] = 0;
- if (ret == -1) {
- if (written > 0) {
- /*
- * Can be a limitation of iconv
- */
- charref_len = 0;
- goto retry;
- }
- ret = -3;
- }
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_out != NULL) {
- ret = xmlUconvWrapper(handler->uconv_out, 0,
- &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- writtentot += written;
- out->content[out->use] = 0;
- if (ret == -1) {
- if (written > 0) {
- /*
- * Can be a limitation of iconv
- */
- charref_len = 0;
- goto retry;
- }
- ret = -3;
- }
- }
-#endif /* LIBXML_ICU_ENABLED */
- else {
- xmlEncodingErr(XML_I18N_NO_OUTPUT,
- "xmlCharEncOutFunc: no output function !\n", NULL);
- return(-1);
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ if (ret == -1) {
+ if (written > 0) {
+ /* Can be a limitation of iconv or uconv */
+ goto retry;
+ }
+ ret = -3;
}
if (ret >= 0) output += ret;
toconv, written, in->use);
#endif
break;
+ case -4:
+ xmlEncodingErr(XML_I18N_NO_OUTPUT,
+ "xmlCharEncOutFunc: no output function !\n", NULL);
+ ret = -1;
+ break;
case -2: {
+ xmlChar charref[20];
int len = in->use;
const xmlChar *utf = (const xmlChar *) in->content;
- int cur;
+ int cur, charrefLen;
cur = xmlGetUTF8Char(utf, &len);
- if ((charref_len != 0) && (written < charref_len)) {
- /*
- * We attempted to insert a character reference and failed.
- * Undo what was written and skip the remaining charref.
- */
- out->use -= written;
- writtentot -= written;
- xmlBufferShrink(in, charref_len - written);
- charref_len = 0;
-
- ret = -1;
+ if (cur <= 0)
break;
- } else if (cur > 0) {
- xmlChar charref[20];
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- in->content[0], in->content[1],
- in->content[2], in->content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ in->content[0], in->content[1],
+ in->content[2], in->content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charref_len = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufferShrink(in, len);
- xmlBufferAddHead(in, charref, -1);
-
- goto retry;
- } else {
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufferShrink(in, len);
+ xmlBufferGrow(out, charrefLen * 4);
+ written = out->size - out->use - 1;
+ toconv = charrefLen;
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ charref, &toconv);
+
+ if ((ret < 0) || (toconv != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
buf);
if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
in->content[0] = ' ';
+ break;
}
- break;
+
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ goto retry;
}
}
return(ret);
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
int ret = 0;
int tofree = 0;
+ int i, handler_in_list = 0;
+
if (handler == NULL) return(-1);
if (handler->name == NULL) return(-1);
+ if (handlers != NULL) {
+ for (i = 0;i < nbCharEncodingHandler; i++) {
+ if (handler == handlers[i]) {
+ handler_in_list = 1;
+ break;
+ }
+ }
+ }
#ifdef LIBXML_ICONV_ENABLED
/*
* Iconv handlers can be used only once, free the whole block.
* and the associated icon resources.
*/
- if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
+ if ((handler_in_list == 0) &&
+ ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
tofree = 1;
if (handler->iconv_out != NULL) {
if (iconv_close(handler->iconv_out))
}
#endif /* LIBXML_ICONV_ENABLED */
#ifdef LIBXML_ICU_ENABLED
- if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
+ if ((handler_in_list == 0) &&
+ ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
tofree = 1;
if (handler->uconv_out != NULL) {
closeIcuConverter(handler->uconv_out);
int ret;
- if (handler->output != NULL) {
- do {
- toconv = in->end - cur;
- written = 32000;
- ret = handler->output(&convbuf[0], &written,
- cur, &toconv);
- if (ret == -1) return(-1);
- unused += written;
- cur += toconv;
- } while (ret == -2);
-#ifdef LIBXML_ICONV_ENABLED
- } else if (handler->iconv_out != NULL) {
- do {
- toconv = in->end - cur;
- written = 32000;
- ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
- &written, cur, &toconv);
- if (ret < 0) {
- if (written > 0)
- ret = -2;
- else
- return(-1);
- }
- unused += written;
- cur += toconv;
- } while (ret == -2);
-#endif
-#ifdef LIBXML_ICU_ENABLED
- } else if (handler->uconv_out != NULL) {
- do {
- toconv = in->end - cur;
- written = 32000;
- ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
- &written, cur, &toconv);
- if (ret < 0) {
- if (written > 0)
- ret = -2;
- else
- return(-1);
- }
- unused += written;
- cur += toconv;
- } while (ret == -2);
-#endif
- } else {
- /* could not find a converter */
- return(-1);
- }
+ do {
+ toconv = in->end - cur;
+ written = 32000;
+ ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
+ cur, &toconv);
+ if (ret < 0) {
+ if (written > 0)
+ ret = -2;
+ else
+ return(-1);
+ }
+ unused += written;
+ cur += toconv;
+ } while (ret == -2);
}
if (in->buf->rawconsumed < unused)
return(-1);
c2 = c2 & 0x3F;
d = d & 0x0F;
d = xlattable [48 + c2 + xlattable [48 + c1 +
- xlattable [32 + d] * 64] * 64];
+ xlattable [32 + d] * 64] * 64];
if (d == 0) {
/* not in character set */
*outlen = out - outstart;