2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
35 #ifdef LIBXML_ICONV_ENABLED
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
75 static int xmlLittleEndian = 1;
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
81 * Handle an out of memory condition
84 xmlEncodingErrMemory(const char *extra)
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91 * @error: the error number
92 * @msg: the error message
96 static void LIBXML_ATTR_FORMAT(2,0)
97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
104 #ifdef LIBXML_ICU_ENABLED
106 openIcuConverter(const char* name, int toUnicode)
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
113 conv->uconv = ucnv_open(name, &status);
114 if (U_FAILURE(status))
117 status = U_ZERO_ERROR;
119 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
123 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
124 NULL, NULL, NULL, &status);
126 if (U_FAILURE(status))
129 status = U_ZERO_ERROR;
130 conv->utf8 = ucnv_open("UTF-8", &status);
131 if (U_SUCCESS(status))
136 ucnv_close(conv->uconv);
142 closeIcuConverter(uconv_t *conv)
145 ucnv_close(conv->uconv);
146 ucnv_close(conv->utf8);
150 #endif /* LIBXML_ICU_ENABLED */
152 /************************************************************************
154 * Conversions To/From UTF8 encoding *
156 ************************************************************************/
160 * @out: a pointer to an array of bytes to store the result
161 * @outlen: the length of @out
162 * @in: a pointer to an array of ASCII chars
163 * @inlen: the length of @in
165 * Take a block of ASCII chars in and try to convert it to an UTF-8
166 * block of chars out.
167 * Returns 0 if success, or -1 otherwise
168 * The value of @inlen after return is the number of octets consumed
169 * if the return value is positive, else unpredictable.
170 * The value of @outlen after return is the number of octets consumed.
173 asciiToUTF8(unsigned char* out, int *outlen,
174 const unsigned char* in, int *inlen) {
175 unsigned char* outstart = out;
176 const unsigned char* base = in;
177 const unsigned char* processed = in;
178 unsigned char* outend = out + *outlen;
179 const unsigned char* inend;
182 inend = in + (*inlen);
183 while ((in < inend) && (out - outstart + 5 < *outlen)) {
191 *outlen = out - outstart;
192 *inlen = processed - base;
196 processed = (const unsigned char*) in;
198 *outlen = out - outstart;
199 *inlen = processed - base;
203 #ifdef LIBXML_OUTPUT_ENABLED
206 * @out: a pointer to an array of bytes to store the result
207 * @outlen: the length of @out
208 * @in: a pointer to an array of UTF-8 chars
209 * @inlen: the length of @in
211 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212 * block of chars out.
214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215 * The value of @inlen after return is the number of octets consumed
216 * if the return value is positive, else unpredictable.
217 * The value of @outlen after return is the number of octets consumed.
220 UTF8Toascii(unsigned char* out, int *outlen,
221 const unsigned char* in, int *inlen) {
222 const unsigned char* processed = in;
223 const unsigned char* outend;
224 const unsigned char* outstart = out;
225 const unsigned char* instart = in;
226 const unsigned char* inend;
230 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
233 * initialization nothing to do
239 inend = in + (*inlen);
240 outend = out + (*outlen);
243 if (d < 0x80) { c= d; trailing= 0; }
245 /* trailing byte in leading position */
246 *outlen = out - outstart;
247 *inlen = processed - instart;
249 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
250 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
251 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
253 /* no chance for this in Ascii */
254 *outlen = out - outstart;
255 *inlen = processed - instart;
259 if (inend - in < trailing) {
263 for ( ; trailing; trailing--) {
264 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
270 /* assertion: c is a single UTF-4 value */
276 /* no chance for this in Ascii */
277 *outlen = out - outstart;
278 *inlen = processed - instart;
283 *outlen = out - outstart;
284 *inlen = processed - instart;
287 #endif /* LIBXML_OUTPUT_ENABLED */
291 * @out: a pointer to an array of bytes to store the result
292 * @outlen: the length of @out
293 * @in: a pointer to an array of ISO Latin 1 chars
294 * @inlen: the length of @in
296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297 * block of chars out.
298 * Returns the number of bytes written if success, or -1 otherwise
299 * The value of @inlen after return is the number of octets consumed
300 * if the return value is positive, else unpredictable.
301 * The value of @outlen after return is the number of octets consumed.
304 isolat1ToUTF8(unsigned char* out, int *outlen,
305 const unsigned char* in, int *inlen) {
306 unsigned char* outstart = out;
307 const unsigned char* base = in;
308 unsigned char* outend;
309 const unsigned char* inend;
310 const unsigned char* instop;
312 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
315 outend = out + *outlen;
316 inend = in + (*inlen);
319 while ((in < inend) && (out < outend - 1)) {
321 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
322 *out++ = ((*in) & 0x3F) | 0x80;
325 if ((instop - in) > (outend - out)) instop = in + (outend - out);
326 while ((in < instop) && (*in < 0x80)) {
330 if ((in < inend) && (out < outend) && (*in < 0x80)) {
333 *outlen = out - outstart;
340 * @out: a pointer to an array of bytes to store the result
341 * @outlen: the length of @out
342 * @inb: a pointer to an array of UTF-8 chars
343 * @inlenb: the length of @in in UTF-8 chars
345 * No op copy operation for UTF8 handling.
347 * Returns the number of bytes written, or -1 if lack of space.
348 * The value of *inlen after return is the number of octets consumed
349 * if the return value is positive, else unpredictable.
352 UTF8ToUTF8(unsigned char* out, int *outlen,
353 const unsigned char* inb, int *inlenb)
357 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
360 /* inb == NULL means output is initialized. */
365 if (*outlen > *inlenb) {
373 memcpy(out, inb, len);
381 #ifdef LIBXML_OUTPUT_ENABLED
384 * @out: a pointer to an array of bytes to store the result
385 * @outlen: the length of @out
386 * @in: a pointer to an array of UTF-8 chars
387 * @inlen: the length of @in
389 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
390 * block of chars out.
392 * Returns the number of bytes written if success, -2 if the transcoding fails,
394 * The value of @inlen after return is the number of octets consumed
395 * if the return value is positive, else unpredictable.
396 * The value of @outlen after return is the number of octets consumed.
399 UTF8Toisolat1(unsigned char* out, int *outlen,
400 const unsigned char* in, int *inlen) {
401 const unsigned char* processed = in;
402 const unsigned char* outend;
403 const unsigned char* outstart = out;
404 const unsigned char* instart = in;
405 const unsigned char* inend;
409 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
412 * initialization nothing to do
418 inend = in + (*inlen);
419 outend = out + (*outlen);
422 if (d < 0x80) { c= d; trailing= 0; }
424 /* trailing byte in leading position */
425 *outlen = out - outstart;
426 *inlen = processed - instart;
428 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
429 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
430 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
432 /* no chance for this in IsoLat1 */
433 *outlen = out - outstart;
434 *inlen = processed - instart;
438 if (inend - in < trailing) {
442 for ( ; trailing; trailing--) {
445 if (((d= *in++) & 0xC0) != 0x80) {
446 *outlen = out - outstart;
447 *inlen = processed - instart;
454 /* assertion: c is a single UTF-4 value */
460 /* no chance for this in IsoLat1 */
461 *outlen = out - outstart;
462 *inlen = processed - instart;
467 *outlen = out - outstart;
468 *inlen = processed - instart;
471 #endif /* LIBXML_OUTPUT_ENABLED */
475 * @out: a pointer to an array of bytes to store the result
476 * @outlen: the length of @out
477 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
478 * @inlenb: the length of @in in UTF-16LE chars
480 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
481 * block of chars out. This function assumes the endian property
482 * is the same between the native type of this machine and the
485 * Returns the number of bytes written, or -1 if lack of space, or -2
486 * if the transcoding fails (if *in is not a valid utf16 string)
487 * The value of *inlen after return is the number of octets consumed
488 * if the return value is positive, else unpredictable.
491 UTF16LEToUTF8(unsigned char* out, int *outlen,
492 const unsigned char* inb, int *inlenb)
494 unsigned char* outstart = out;
495 const unsigned char* processed = inb;
496 unsigned char* outend = out + *outlen;
497 unsigned short* in = (unsigned short*) inb;
498 unsigned short* inend;
499 unsigned int c, d, inlen;
503 if ((*inlenb % 2) == 1)
507 while ((in < inend) && (out - outstart + 5 < *outlen)) {
508 if (xmlLittleEndian) {
511 tmp = (unsigned char *) in;
513 c = c | (((unsigned int)*tmp) << 8);
516 if ((c & 0xFC00) == 0xD800) { /* surrogates */
517 if (in >= inend) { /* (in > inend) shouldn't happens */
520 if (xmlLittleEndian) {
523 tmp = (unsigned char *) in;
525 d = d | (((unsigned int)*tmp) << 8);
528 if ((d & 0xFC00) == 0xDC00) {
535 *outlen = out - outstart;
536 *inlenb = processed - inb;
541 /* assertion: c is a single UTF-4 value */
544 if (c < 0x80) { *out++= c; bits= -6; }
545 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
546 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
547 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
549 for ( ; bits >= 0; bits-= 6) {
552 *out++= ((c >> bits) & 0x3F) | 0x80;
554 processed = (const unsigned char*) in;
556 *outlen = out - outstart;
557 *inlenb = processed - inb;
561 #ifdef LIBXML_OUTPUT_ENABLED
564 * @outb: a pointer to an array of bytes to store the result
565 * @outlen: the length of @outb
566 * @in: a pointer to an array of UTF-8 chars
567 * @inlen: the length of @in
569 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
570 * block of chars out.
572 * Returns the number of bytes written, or -1 if lack of space, or -2
573 * if the transcoding failed.
576 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
577 const unsigned char* in, int *inlen)
579 unsigned short* out = (unsigned short*) outb;
580 const unsigned char* processed = in;
581 const unsigned char *const instart = in;
582 unsigned short* outstart= out;
583 unsigned short* outend;
584 const unsigned char* inend;
588 unsigned short tmp1, tmp2;
590 /* UTF16LE encoding has no BOM */
591 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
598 outend = out + (*outlen / 2);
601 if (d < 0x80) { c= d; trailing= 0; }
603 /* trailing byte in leading position */
604 *outlen = (out - outstart) * 2;
605 *inlen = processed - instart;
607 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
608 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
609 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
611 /* no chance for this in UTF-16 */
612 *outlen = (out - outstart) * 2;
613 *inlen = processed - instart;
617 if (inend - in < trailing) {
621 for ( ; trailing; trailing--) {
622 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
628 /* assertion: c is a single UTF-4 value */
632 if (xmlLittleEndian) {
635 tmp = (unsigned char *) out;
637 *(tmp + 1) = c >> 8 ;
641 else if (c < 0x110000) {
645 if (xmlLittleEndian) {
646 *out++ = 0xD800 | (c >> 10);
647 *out++ = 0xDC00 | (c & 0x03FF);
649 tmp1 = 0xD800 | (c >> 10);
650 tmp = (unsigned char *) out;
651 *tmp = (unsigned char) tmp1;
652 *(tmp + 1) = tmp1 >> 8;
655 tmp2 = 0xDC00 | (c & 0x03FF);
656 tmp = (unsigned char *) out;
657 *tmp = (unsigned char) tmp2;
658 *(tmp + 1) = tmp2 >> 8;
666 *outlen = (out - outstart) * 2;
667 *inlen = processed - instart;
673 * @outb: a pointer to an array of bytes to store the result
674 * @outlen: the length of @outb
675 * @in: a pointer to an array of UTF-8 chars
676 * @inlen: the length of @in
678 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
679 * block of chars out.
681 * Returns the number of bytes written, or -1 if lack of space, or -2
682 * if the transcoding failed.
685 UTF8ToUTF16(unsigned char* outb, int *outlen,
686 const unsigned char* in, int *inlen)
690 * initialization, add the Byte Order Mark for UTF-16LE
697 #ifdef DEBUG_ENCODING
698 xmlGenericError(xmlGenericErrorContext,
699 "Added FFFE Byte Order Mark\n");
707 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
709 #endif /* LIBXML_OUTPUT_ENABLED */
713 * @out: a pointer to an array of bytes to store the result
714 * @outlen: the length of @out
715 * @inb: a pointer to an array of UTF-16 passed as a byte array
716 * @inlenb: the length of @in in UTF-16 chars
718 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
719 * block of chars out. This function assumes the endian property
720 * is the same between the native type of this machine and the
723 * Returns the number of bytes written, or -1 if lack of space, or -2
724 * if the transcoding fails (if *in is not a valid utf16 string)
725 * The value of *inlen after return is the number of octets consumed
726 * if the return value is positive, else unpredictable.
729 UTF16BEToUTF8(unsigned char* out, int *outlen,
730 const unsigned char* inb, int *inlenb)
732 unsigned char* outstart = out;
733 const unsigned char* processed = inb;
734 unsigned char* outend = out + *outlen;
735 unsigned short* in = (unsigned short*) inb;
736 unsigned short* inend;
737 unsigned int c, d, inlen;
741 if ((*inlenb % 2) == 1)
746 if (xmlLittleEndian) {
747 tmp = (unsigned char *) in;
750 c = c | (unsigned int) *tmp;
755 if ((c & 0xFC00) == 0xD800) { /* surrogates */
756 if (in >= inend) { /* (in > inend) shouldn't happens */
757 *outlen = out - outstart;
758 *inlenb = processed - inb;
761 if (xmlLittleEndian) {
762 tmp = (unsigned char *) in;
765 d = d | (unsigned int) *tmp;
770 if ((d & 0xFC00) == 0xDC00) {
777 *outlen = out - outstart;
778 *inlenb = processed - inb;
783 /* assertion: c is a single UTF-4 value */
786 if (c < 0x80) { *out++= c; bits= -6; }
787 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
788 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
789 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
791 for ( ; bits >= 0; bits-= 6) {
794 *out++= ((c >> bits) & 0x3F) | 0x80;
796 processed = (const unsigned char*) in;
798 *outlen = out - outstart;
799 *inlenb = processed - inb;
803 #ifdef LIBXML_OUTPUT_ENABLED
806 * @outb: a pointer to an array of bytes to store the result
807 * @outlen: the length of @outb
808 * @in: a pointer to an array of UTF-8 chars
809 * @inlen: the length of @in
811 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
812 * block of chars out.
814 * Returns the number of byte written, or -1 by lack of space, or -2
815 * if the transcoding failed.
818 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
819 const unsigned char* in, int *inlen)
821 unsigned short* out = (unsigned short*) outb;
822 const unsigned char* processed = in;
823 const unsigned char *const instart = in;
824 unsigned short* outstart= out;
825 unsigned short* outend;
826 const unsigned char* inend;
830 unsigned short tmp1, tmp2;
832 /* UTF-16BE has no BOM */
833 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
840 outend = out + (*outlen / 2);
843 if (d < 0x80) { c= d; trailing= 0; }
845 /* trailing byte in leading position */
846 *outlen = out - outstart;
847 *inlen = processed - instart;
849 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
850 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
851 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
853 /* no chance for this in UTF-16 */
854 *outlen = out - outstart;
855 *inlen = processed - instart;
859 if (inend - in < trailing) {
863 for ( ; trailing; trailing--) {
864 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
869 /* assertion: c is a single UTF-4 value */
871 if (out >= outend) break;
872 if (xmlLittleEndian) {
873 tmp = (unsigned char *) out;
881 else if (c < 0x110000) {
882 if (out+1 >= outend) break;
884 if (xmlLittleEndian) {
885 tmp1 = 0xD800 | (c >> 10);
886 tmp = (unsigned char *) out;
888 *(tmp + 1) = (unsigned char) tmp1;
891 tmp2 = 0xDC00 | (c & 0x03FF);
892 tmp = (unsigned char *) out;
894 *(tmp + 1) = (unsigned char) tmp2;
897 *out++ = 0xD800 | (c >> 10);
898 *out++ = 0xDC00 | (c & 0x03FF);
905 *outlen = (out - outstart) * 2;
906 *inlen = processed - instart;
909 #endif /* LIBXML_OUTPUT_ENABLED */
911 /************************************************************************
913 * Generic encoding handling routines *
915 ************************************************************************/
918 * xmlDetectCharEncoding:
919 * @in: a pointer to the first bytes of the XML entity, must be at least
920 * 2 bytes long (at least 4 if encoding is UTF4 variant).
921 * @len: pointer to the length of the buffer
923 * Guess the encoding of the entity using the first bytes of the entity content
924 * according to the non-normative appendix F of the XML-1.0 recommendation.
926 * Returns one of the XML_CHAR_ENCODING_... values.
929 xmlDetectCharEncoding(const unsigned char* in, int len)
932 return(XML_CHAR_ENCODING_NONE);
934 if ((in[0] == 0x00) && (in[1] == 0x00) &&
935 (in[2] == 0x00) && (in[3] == 0x3C))
936 return(XML_CHAR_ENCODING_UCS4BE);
937 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
938 (in[2] == 0x00) && (in[3] == 0x00))
939 return(XML_CHAR_ENCODING_UCS4LE);
940 if ((in[0] == 0x00) && (in[1] == 0x00) &&
941 (in[2] == 0x3C) && (in[3] == 0x00))
942 return(XML_CHAR_ENCODING_UCS4_2143);
943 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
944 (in[2] == 0x00) && (in[3] == 0x00))
945 return(XML_CHAR_ENCODING_UCS4_3412);
946 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
947 (in[2] == 0xA7) && (in[3] == 0x94))
948 return(XML_CHAR_ENCODING_EBCDIC);
949 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
950 (in[2] == 0x78) && (in[3] == 0x6D))
951 return(XML_CHAR_ENCODING_UTF8);
953 * Although not part of the recommendation, we also
954 * attempt an "auto-recognition" of UTF-16LE and
955 * UTF-16BE encodings.
957 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
958 (in[2] == 0x3F) && (in[3] == 0x00))
959 return(XML_CHAR_ENCODING_UTF16LE);
960 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
961 (in[2] == 0x00) && (in[3] == 0x3F))
962 return(XML_CHAR_ENCODING_UTF16BE);
966 * Errata on XML-1.0 June 20 2001
967 * We now allow an UTF8 encoded BOM
969 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
971 return(XML_CHAR_ENCODING_UTF8);
973 /* For UTF-16 we can recognize by the BOM */
975 if ((in[0] == 0xFE) && (in[1] == 0xFF))
976 return(XML_CHAR_ENCODING_UTF16BE);
977 if ((in[0] == 0xFF) && (in[1] == 0xFE))
978 return(XML_CHAR_ENCODING_UTF16LE);
980 return(XML_CHAR_ENCODING_NONE);
984 * xmlCleanupEncodingAliases:
986 * Unregisters all aliases
989 xmlCleanupEncodingAliases(void) {
992 if (xmlCharEncodingAliases == NULL)
995 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
996 if (xmlCharEncodingAliases[i].name != NULL)
997 xmlFree((char *) xmlCharEncodingAliases[i].name);
998 if (xmlCharEncodingAliases[i].alias != NULL)
999 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1001 xmlCharEncodingAliasesNb = 0;
1002 xmlCharEncodingAliasesMax = 0;
1003 xmlFree(xmlCharEncodingAliases);
1004 xmlCharEncodingAliases = NULL;
1008 * xmlGetEncodingAlias:
1009 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1011 * Lookup an encoding name for the given alias.
1013 * Returns NULL if not found, otherwise the original name
1016 xmlGetEncodingAlias(const char *alias) {
1023 if (xmlCharEncodingAliases == NULL)
1026 for (i = 0;i < 99;i++) {
1027 upper[i] = toupper(alias[i]);
1028 if (upper[i] == 0) break;
1033 * Walk down the list looking for a definition of the alias
1035 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1036 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1037 return(xmlCharEncodingAliases[i].name);
1044 * xmlAddEncodingAlias:
1045 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1046 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1048 * Registers an alias @alias for an encoding named @name. Existing alias
1049 * will be overwritten.
1051 * Returns 0 in case of success, -1 in case of error
1054 xmlAddEncodingAlias(const char *name, const char *alias) {
1058 if ((name == NULL) || (alias == NULL))
1061 for (i = 0;i < 99;i++) {
1062 upper[i] = toupper(alias[i]);
1063 if (upper[i] == 0) break;
1067 if (xmlCharEncodingAliases == NULL) {
1068 xmlCharEncodingAliasesNb = 0;
1069 xmlCharEncodingAliasesMax = 20;
1070 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1071 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1072 if (xmlCharEncodingAliases == NULL)
1074 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1075 xmlCharEncodingAliasesMax *= 2;
1076 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1077 xmlRealloc(xmlCharEncodingAliases,
1078 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1081 * Walk down the list looking for a definition of the alias
1083 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1084 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1086 * Replace the definition.
1088 xmlFree((char *) xmlCharEncodingAliases[i].name);
1089 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1094 * Add the definition
1096 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1097 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1098 xmlCharEncodingAliasesNb++;
1103 * xmlDelEncodingAlias:
1104 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1106 * Unregisters an encoding alias @alias
1108 * Returns 0 in case of success, -1 in case of error
1111 xmlDelEncodingAlias(const char *alias) {
1117 if (xmlCharEncodingAliases == NULL)
1120 * Walk down the list looking for a definition of the alias
1122 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1123 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1124 xmlFree((char *) xmlCharEncodingAliases[i].name);
1125 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1126 xmlCharEncodingAliasesNb--;
1127 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1128 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1136 * xmlParseCharEncoding:
1137 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1139 * Compare the string to the encoding schemes already known. Note
1140 * that the comparison is case insensitive accordingly to the section
1141 * [XML] 4.3.3 Character Encoding in Entities.
1143 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1144 * if not recognized.
1147 xmlParseCharEncoding(const char* name)
1154 return(XML_CHAR_ENCODING_NONE);
1157 * Do the alias resolution
1159 alias = xmlGetEncodingAlias(name);
1163 for (i = 0;i < 499;i++) {
1164 upper[i] = toupper(name[i]);
1165 if (upper[i] == 0) break;
1169 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1170 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1171 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1174 * NOTE: if we were able to parse this, the endianness of UTF16 is
1175 * already found and in use
1177 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1178 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1180 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1181 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1185 * NOTE: if we were able to parse this, the endianness of UCS4 is
1186 * already found and in use
1188 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1189 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1193 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1194 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1195 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1197 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1198 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1199 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1201 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1202 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1203 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1204 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1205 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1206 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1207 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1209 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1210 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1211 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1213 #ifdef DEBUG_ENCODING
1214 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1216 return(XML_CHAR_ENCODING_ERROR);
1220 * xmlGetCharEncodingName:
1221 * @enc: the encoding
1223 * The "canonical" name for XML encoding.
1224 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1225 * Section 4.3.3 Character Encoding in Entities
1227 * Returns the canonical name for the given encoding
1231 xmlGetCharEncodingName(xmlCharEncoding enc) {
1233 case XML_CHAR_ENCODING_ERROR:
1235 case XML_CHAR_ENCODING_NONE:
1237 case XML_CHAR_ENCODING_UTF8:
1239 case XML_CHAR_ENCODING_UTF16LE:
1241 case XML_CHAR_ENCODING_UTF16BE:
1243 case XML_CHAR_ENCODING_EBCDIC:
1245 case XML_CHAR_ENCODING_UCS4LE:
1246 return("ISO-10646-UCS-4");
1247 case XML_CHAR_ENCODING_UCS4BE:
1248 return("ISO-10646-UCS-4");
1249 case XML_CHAR_ENCODING_UCS4_2143:
1250 return("ISO-10646-UCS-4");
1251 case XML_CHAR_ENCODING_UCS4_3412:
1252 return("ISO-10646-UCS-4");
1253 case XML_CHAR_ENCODING_UCS2:
1254 return("ISO-10646-UCS-2");
1255 case XML_CHAR_ENCODING_8859_1:
1256 return("ISO-8859-1");
1257 case XML_CHAR_ENCODING_8859_2:
1258 return("ISO-8859-2");
1259 case XML_CHAR_ENCODING_8859_3:
1260 return("ISO-8859-3");
1261 case XML_CHAR_ENCODING_8859_4:
1262 return("ISO-8859-4");
1263 case XML_CHAR_ENCODING_8859_5:
1264 return("ISO-8859-5");
1265 case XML_CHAR_ENCODING_8859_6:
1266 return("ISO-8859-6");
1267 case XML_CHAR_ENCODING_8859_7:
1268 return("ISO-8859-7");
1269 case XML_CHAR_ENCODING_8859_8:
1270 return("ISO-8859-8");
1271 case XML_CHAR_ENCODING_8859_9:
1272 return("ISO-8859-9");
1273 case XML_CHAR_ENCODING_2022_JP:
1274 return("ISO-2022-JP");
1275 case XML_CHAR_ENCODING_SHIFT_JIS:
1276 return("Shift-JIS");
1277 case XML_CHAR_ENCODING_EUC_JP:
1279 case XML_CHAR_ENCODING_ASCII:
1285 /************************************************************************
1287 * Char encoding handlers *
1289 ************************************************************************/
1292 /* the size should be growable, but it's not a big deal ... */
1293 #define MAX_ENCODING_HANDLERS 50
1294 static xmlCharEncodingHandlerPtr *handlers = NULL;
1295 static int nbCharEncodingHandler = 0;
1298 * The default is UTF-8 for XML, that's also the default used for the
1299 * parser internals, so the default encoding handler is NULL
1302 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1305 * xmlNewCharEncodingHandler:
1306 * @name: the encoding name, in UTF-8 format (ASCII actually)
1307 * @input: the xmlCharEncodingInputFunc to read that encoding
1308 * @output: the xmlCharEncodingOutputFunc to write that encoding
1310 * Create and registers an xmlCharEncodingHandler.
1312 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1314 xmlCharEncodingHandlerPtr
1315 xmlNewCharEncodingHandler(const char *name,
1316 xmlCharEncodingInputFunc input,
1317 xmlCharEncodingOutputFunc output) {
1318 xmlCharEncodingHandlerPtr handler;
1325 * Do the alias resolution
1327 alias = xmlGetEncodingAlias(name);
1332 * Keep only the uppercase version of the encoding.
1335 xmlEncodingErr(XML_I18N_NO_NAME,
1336 "xmlNewCharEncodingHandler : no name !\n", NULL);
1339 for (i = 0;i < 499;i++) {
1340 upper[i] = toupper(name[i]);
1341 if (upper[i] == 0) break;
1344 up = xmlMemStrdup(upper);
1346 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1351 * allocate and fill-up an handler block.
1353 handler = (xmlCharEncodingHandlerPtr)
1354 xmlMalloc(sizeof(xmlCharEncodingHandler));
1355 if (handler == NULL) {
1357 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1360 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1361 handler->input = input;
1362 handler->output = output;
1365 #ifdef LIBXML_ICONV_ENABLED
1366 handler->iconv_in = NULL;
1367 handler->iconv_out = NULL;
1369 #ifdef LIBXML_ICU_ENABLED
1370 handler->uconv_in = NULL;
1371 handler->uconv_out = NULL;
1375 * registers and returns the handler.
1377 xmlRegisterCharEncodingHandler(handler);
1378 #ifdef DEBUG_ENCODING
1379 xmlGenericError(xmlGenericErrorContext,
1380 "Registered encoding handler for %s\n", name);
1386 * xmlInitCharEncodingHandlers:
1388 * Initialize the char encoding support, it registers the default
1389 * encoding supported.
1390 * NOTE: while public, this function usually doesn't need to be called
1391 * in normal processing.
1394 xmlInitCharEncodingHandlers(void) {
1395 unsigned short int tst = 0x1234;
1396 unsigned char *ptr = (unsigned char *) &tst;
1398 if (handlers != NULL) return;
1400 handlers = (xmlCharEncodingHandlerPtr *)
1401 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1403 if (*ptr == 0x12) xmlLittleEndian = 0;
1404 else if (*ptr == 0x34) xmlLittleEndian = 1;
1406 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1407 "Odd problem at endianness detection\n", NULL);
1410 if (handlers == NULL) {
1411 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1414 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1415 #ifdef LIBXML_OUTPUT_ENABLED
1417 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1419 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1420 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1421 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1422 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1423 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1424 #ifdef LIBXML_HTML_ENABLED
1425 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1429 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1431 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1432 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1433 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1434 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1435 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1436 #endif /* LIBXML_OUTPUT_ENABLED */
1437 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1438 #ifdef LIBXML_ISO8859X_ENABLED
1439 xmlRegisterCharEncodingHandlersISO8859x ();
1446 * xmlCleanupCharEncodingHandlers:
1448 * Cleanup the memory allocated for the char encoding support, it
1449 * unregisters all the encoding handlers and the aliases.
1452 xmlCleanupCharEncodingHandlers(void) {
1453 xmlCleanupEncodingAliases();
1455 if (handlers == NULL) return;
1457 for (;nbCharEncodingHandler > 0;) {
1458 nbCharEncodingHandler--;
1459 if (handlers[nbCharEncodingHandler] != NULL) {
1460 if (handlers[nbCharEncodingHandler]->name != NULL)
1461 xmlFree(handlers[nbCharEncodingHandler]->name);
1462 xmlFree(handlers[nbCharEncodingHandler]);
1467 nbCharEncodingHandler = 0;
1468 xmlDefaultCharEncodingHandler = NULL;
1472 * xmlRegisterCharEncodingHandler:
1473 * @handler: the xmlCharEncodingHandlerPtr handler block
1475 * Register the char encoding handler, surprising, isn't it ?
1478 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1479 if (handlers == NULL) xmlInitCharEncodingHandlers();
1480 if ((handler == NULL) || (handlers == NULL)) {
1481 xmlEncodingErr(XML_I18N_NO_HANDLER,
1482 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1486 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1487 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1488 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1489 "MAX_ENCODING_HANDLERS");
1492 handlers[nbCharEncodingHandler++] = handler;
1496 * xmlGetCharEncodingHandler:
1497 * @enc: an xmlCharEncoding value.
1499 * Search in the registered set the handler able to read/write that encoding.
1501 * Returns the handler or NULL if not found
1503 xmlCharEncodingHandlerPtr
1504 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1505 xmlCharEncodingHandlerPtr handler;
1507 if (handlers == NULL) xmlInitCharEncodingHandlers();
1509 case XML_CHAR_ENCODING_ERROR:
1511 case XML_CHAR_ENCODING_NONE:
1513 case XML_CHAR_ENCODING_UTF8:
1515 case XML_CHAR_ENCODING_UTF16LE:
1516 return(xmlUTF16LEHandler);
1517 case XML_CHAR_ENCODING_UTF16BE:
1518 return(xmlUTF16BEHandler);
1519 case XML_CHAR_ENCODING_EBCDIC:
1520 handler = xmlFindCharEncodingHandler("EBCDIC");
1521 if (handler != NULL) return(handler);
1522 handler = xmlFindCharEncodingHandler("ebcdic");
1523 if (handler != NULL) return(handler);
1524 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1525 if (handler != NULL) return(handler);
1526 handler = xmlFindCharEncodingHandler("IBM-037");
1527 if (handler != NULL) return(handler);
1529 case XML_CHAR_ENCODING_UCS4BE:
1530 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1531 if (handler != NULL) return(handler);
1532 handler = xmlFindCharEncodingHandler("UCS-4");
1533 if (handler != NULL) return(handler);
1534 handler = xmlFindCharEncodingHandler("UCS4");
1535 if (handler != NULL) return(handler);
1537 case XML_CHAR_ENCODING_UCS4LE:
1538 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1539 if (handler != NULL) return(handler);
1540 handler = xmlFindCharEncodingHandler("UCS-4");
1541 if (handler != NULL) return(handler);
1542 handler = xmlFindCharEncodingHandler("UCS4");
1543 if (handler != NULL) return(handler);
1545 case XML_CHAR_ENCODING_UCS4_2143:
1547 case XML_CHAR_ENCODING_UCS4_3412:
1549 case XML_CHAR_ENCODING_UCS2:
1550 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1551 if (handler != NULL) return(handler);
1552 handler = xmlFindCharEncodingHandler("UCS-2");
1553 if (handler != NULL) return(handler);
1554 handler = xmlFindCharEncodingHandler("UCS2");
1555 if (handler != NULL) return(handler);
1559 * We used to keep ISO Latin encodings native in the
1560 * generated data. This led to so many problems that
1561 * this has been removed. One can still change this
1562 * back by registering no-ops encoders for those
1564 case XML_CHAR_ENCODING_8859_1:
1565 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1566 if (handler != NULL) return(handler);
1568 case XML_CHAR_ENCODING_8859_2:
1569 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1570 if (handler != NULL) return(handler);
1572 case XML_CHAR_ENCODING_8859_3:
1573 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1574 if (handler != NULL) return(handler);
1576 case XML_CHAR_ENCODING_8859_4:
1577 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1578 if (handler != NULL) return(handler);
1580 case XML_CHAR_ENCODING_8859_5:
1581 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1582 if (handler != NULL) return(handler);
1584 case XML_CHAR_ENCODING_8859_6:
1585 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1586 if (handler != NULL) return(handler);
1588 case XML_CHAR_ENCODING_8859_7:
1589 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1590 if (handler != NULL) return(handler);
1592 case XML_CHAR_ENCODING_8859_8:
1593 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1594 if (handler != NULL) return(handler);
1596 case XML_CHAR_ENCODING_8859_9:
1597 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1598 if (handler != NULL) return(handler);
1602 case XML_CHAR_ENCODING_2022_JP:
1603 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1604 if (handler != NULL) return(handler);
1606 case XML_CHAR_ENCODING_SHIFT_JIS:
1607 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1608 if (handler != NULL) return(handler);
1609 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1610 if (handler != NULL) return(handler);
1611 handler = xmlFindCharEncodingHandler("Shift_JIS");
1612 if (handler != NULL) return(handler);
1614 case XML_CHAR_ENCODING_EUC_JP:
1615 handler = xmlFindCharEncodingHandler("EUC-JP");
1616 if (handler != NULL) return(handler);
1622 #ifdef DEBUG_ENCODING
1623 xmlGenericError(xmlGenericErrorContext,
1624 "No handler found for encoding %d\n", enc);
1630 * xmlFindCharEncodingHandler:
1631 * @name: a string describing the char encoding.
1633 * Search in the registered set the handler able to read/write that encoding.
1635 * Returns the handler or NULL if not found
1637 xmlCharEncodingHandlerPtr
1638 xmlFindCharEncodingHandler(const char *name) {
1641 xmlCharEncoding alias;
1642 #ifdef LIBXML_ICONV_ENABLED
1643 xmlCharEncodingHandlerPtr enc;
1644 iconv_t icv_in, icv_out;
1645 #endif /* LIBXML_ICONV_ENABLED */
1646 #ifdef LIBXML_ICU_ENABLED
1647 xmlCharEncodingHandlerPtr encu;
1648 uconv_t *ucv_in, *ucv_out;
1649 #endif /* LIBXML_ICU_ENABLED */
1653 if (handlers == NULL) xmlInitCharEncodingHandlers();
1654 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1655 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1658 * Do the alias resolution
1661 nalias = xmlGetEncodingAlias(name);
1666 * Check first for directly registered encoding names
1668 for (i = 0;i < 99;i++) {
1669 upper[i] = toupper(name[i]);
1670 if (upper[i] == 0) break;
1674 if (handlers != NULL) {
1675 for (i = 0;i < nbCharEncodingHandler; i++) {
1676 if (!strcmp(upper, handlers[i]->name)) {
1677 #ifdef DEBUG_ENCODING
1678 xmlGenericError(xmlGenericErrorContext,
1679 "Found registered handler for encoding %s\n", name);
1681 return(handlers[i]);
1686 #ifdef LIBXML_ICONV_ENABLED
1687 /* check whether iconv can handle this */
1688 icv_in = iconv_open("UTF-8", name);
1689 icv_out = iconv_open(name, "UTF-8");
1690 if (icv_in == (iconv_t) -1) {
1691 icv_in = iconv_open("UTF-8", upper);
1693 if (icv_out == (iconv_t) -1) {
1694 icv_out = iconv_open(upper, "UTF-8");
1696 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1697 enc = (xmlCharEncodingHandlerPtr)
1698 xmlMalloc(sizeof(xmlCharEncodingHandler));
1700 iconv_close(icv_in);
1701 iconv_close(icv_out);
1704 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1705 enc->name = xmlMemStrdup(name);
1708 enc->iconv_in = icv_in;
1709 enc->iconv_out = icv_out;
1710 #ifdef DEBUG_ENCODING
1711 xmlGenericError(xmlGenericErrorContext,
1712 "Found iconv handler for encoding %s\n", name);
1715 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1716 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1717 "iconv : problems with filters for '%s'\n", name);
1719 #endif /* LIBXML_ICONV_ENABLED */
1720 #ifdef LIBXML_ICU_ENABLED
1721 /* check whether icu can handle this */
1722 ucv_in = openIcuConverter(name, 1);
1723 ucv_out = openIcuConverter(name, 0);
1724 if (ucv_in != NULL && ucv_out != NULL) {
1725 encu = (xmlCharEncodingHandlerPtr)
1726 xmlMalloc(sizeof(xmlCharEncodingHandler));
1728 closeIcuConverter(ucv_in);
1729 closeIcuConverter(ucv_out);
1732 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1733 encu->name = xmlMemStrdup(name);
1735 encu->output = NULL;
1736 encu->uconv_in = ucv_in;
1737 encu->uconv_out = ucv_out;
1738 #ifdef DEBUG_ENCODING
1739 xmlGenericError(xmlGenericErrorContext,
1740 "Found ICU converter handler for encoding %s\n", name);
1743 } else if (ucv_in != NULL || ucv_out != NULL) {
1744 closeIcuConverter(ucv_in);
1745 closeIcuConverter(ucv_out);
1746 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1747 "ICU converter : problems with filters for '%s'\n", name);
1749 #endif /* LIBXML_ICU_ENABLED */
1751 #ifdef DEBUG_ENCODING
1752 xmlGenericError(xmlGenericErrorContext,
1753 "No handler found for encoding %s\n", name);
1757 * Fallback using the canonical names
1759 alias = xmlParseCharEncoding(norig);
1760 if (alias != XML_CHAR_ENCODING_ERROR) {
1762 canon = xmlGetCharEncodingName(alias);
1763 if ((canon != NULL) && (strcmp(name, canon))) {
1764 return(xmlFindCharEncodingHandler(canon));
1768 /* If "none of the above", give up */
1772 /************************************************************************
1774 * ICONV based generic conversion functions *
1776 ************************************************************************/
1778 #ifdef LIBXML_ICONV_ENABLED
1781 * @cd: iconv converter data structure
1782 * @out: a pointer to an array of bytes to store the result
1783 * @outlen: the length of @out
1784 * @in: a pointer to an array of ISO Latin 1 chars
1785 * @inlen: the length of @in
1787 * Returns 0 if success, or
1788 * -1 by lack of space, or
1789 * -2 if the transcoding fails (for *in is not valid utf8 string or
1790 * the result of transformation can't fit into the encoding we want), or
1791 * -3 if there the last byte can't form a single output char.
1793 * The value of @inlen after return is the number of octets consumed
1794 * as the return value is positive, else unpredictable.
1795 * The value of @outlen after return is the number of ocetes consumed.
1798 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1799 const unsigned char *in, int *inlen) {
1800 size_t icv_inlen, icv_outlen;
1801 const char *icv_in = (const char *) in;
1802 char *icv_out = (char *) out;
1805 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1806 if (outlen != NULL) *outlen = 0;
1810 icv_outlen = *outlen;
1811 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1812 *inlen -= icv_inlen;
1813 *outlen -= icv_outlen;
1814 if ((icv_inlen != 0) || (ret == -1)) {
1816 if (errno == EILSEQ) {
1821 if (errno == E2BIG) {
1826 if (errno == EINVAL) {
1836 #endif /* LIBXML_ICONV_ENABLED */
1838 /************************************************************************
1840 * ICU based generic conversion functions *
1842 ************************************************************************/
1844 #ifdef LIBXML_ICU_ENABLED
1847 * @cd: ICU uconverter data structure
1848 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1849 * @out: a pointer to an array of bytes to store the result
1850 * @outlen: the length of @out
1851 * @in: a pointer to an array of ISO Latin 1 chars
1852 * @inlen: the length of @in
1854 * Returns 0 if success, or
1855 * -1 by lack of space, or
1856 * -2 if the transcoding fails (for *in is not valid utf8 string or
1857 * the result of transformation can't fit into the encoding we want), or
1858 * -3 if there the last byte can't form a single output char.
1860 * The value of @inlen after return is the number of octets consumed
1861 * as the return value is positive, else unpredictable.
1862 * The value of @outlen after return is the number of ocetes consumed.
1865 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1866 const unsigned char *in, int *inlen) {
1867 const char *ucv_in = (const char *) in;
1868 char *ucv_out = (char *) out;
1869 UErrorCode err = U_ZERO_ERROR;
1871 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1872 if (outlen != NULL) *outlen = 0;
1878 * 1. is ucnv_convert(To|From)Algorithmic better?
1879 * 2. had we better use an explicit pivot buffer?
1880 * 3. error returned comes from 'fromUnicode' only even
1881 * when toUnicode is true !
1884 /* encoding => UTF-16 => UTF-8 */
1885 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1886 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1889 /* UTF-8 => UTF-16 => encoding */
1890 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1891 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1894 *inlen = ucv_in - (const char*) in;
1895 *outlen = ucv_out - (char *) out;
1898 if (err == U_BUFFER_OVERFLOW_ERROR)
1900 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1902 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1905 #endif /* LIBXML_ICU_ENABLED */
1907 /************************************************************************
1909 * The real API used by libxml for on-the-fly conversion *
1911 ************************************************************************/
1914 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1915 int *outlen, const unsigned char *in, int *inlen) {
1918 if (handler->input != NULL) {
1919 ret = handler->input(out, outlen, in, inlen);
1921 #ifdef LIBXML_ICONV_ENABLED
1922 else if (handler->iconv_in != NULL) {
1923 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1925 #endif /* LIBXML_ICONV_ENABLED */
1926 #ifdef LIBXML_ICU_ENABLED
1927 else if (handler->uconv_in != NULL) {
1928 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
1930 #endif /* LIBXML_ICU_ENABLED */
1940 /* Returns -4 if no output function was found. */
1942 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1943 int *outlen, const unsigned char *in, int *inlen) {
1946 if (handler->output != NULL) {
1947 ret = handler->output(out, outlen, in, inlen);
1949 #ifdef LIBXML_ICONV_ENABLED
1950 else if (handler->iconv_out != NULL) {
1951 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1953 #endif /* LIBXML_ICONV_ENABLED */
1954 #ifdef LIBXML_ICU_ENABLED
1955 else if (handler->uconv_out != NULL) {
1956 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
1958 #endif /* LIBXML_ICU_ENABLED */
1969 * xmlCharEncFirstLineInt:
1970 * @handler: char enconding transformation data structure
1971 * @out: an xmlBuffer for the output.
1972 * @in: an xmlBuffer for the input
1973 * @len: number of bytes to convert for the first line, or -1
1975 * Front-end for the encoding handler input function, but handle only
1976 * the very first line, i.e. limit itself to 45 chars.
1978 * Returns the number of byte written if success, or
1980 * -2 if the transcoding fails (for *in is not valid utf8 string or
1981 * the result of transformation can't fit into the encoding we want), or
1984 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1985 xmlBufferPtr in, int len) {
1990 if (handler == NULL) return(-1);
1991 if (out == NULL) return(-1);
1992 if (in == NULL) return(-1);
1994 /* calculate space available */
1995 written = out->size - out->use - 1; /* count '\0' */
1998 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1999 * 45 chars should be sufficient to reach the end of the encoding
2000 * declaration without going too far inside the document content.
2001 * on UTF-16 this means 90bytes, on UCS4 this means 180
2002 * The actual value depending on guessed encoding is passed as @len
2012 if (toconv * 2 >= written) {
2013 xmlBufferGrow(out, toconv * 2);
2014 written = out->size - out->use - 1;
2017 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2018 in->content, &toconv);
2019 xmlBufferShrink(in, toconv);
2020 out->use += written;
2021 out->content[out->use] = 0;
2022 if (ret == -1) ret = -3;
2024 #ifdef DEBUG_ENCODING
2027 xmlGenericError(xmlGenericErrorContext,
2028 "converted %d bytes to %d bytes of input\n",
2032 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2033 toconv, written, in->use);
2036 xmlGenericError(xmlGenericErrorContext,
2037 "input conversion failed due to input error\n");
2040 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2041 toconv, written, in->use);
2044 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2046 #endif /* DEBUG_ENCODING */
2048 * Ignore when input buffer is not on a boundary
2050 if (ret == -3) ret = 0;
2051 if (ret == -1) ret = 0;
2056 * xmlCharEncFirstLine:
2057 * @handler: char enconding transformation data structure
2058 * @out: an xmlBuffer for the output.
2059 * @in: an xmlBuffer for the input
2061 * Front-end for the encoding handler input function, but handle only
2062 * the very first line, i.e. limit itself to 45 chars.
2064 * Returns the number of byte written if success, or
2066 * -2 if the transcoding fails (for *in is not valid utf8 string or
2067 * the result of transformation can't fit into the encoding we want), or
2070 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2072 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2076 * xmlCharEncFirstLineInput:
2077 * @input: a parser input buffer
2078 * @len: number of bytes to convert for the first line, or -1
2080 * Front-end for the encoding handler input function, but handle only
2081 * the very first line. Point is that this is based on autodetection
2082 * of the encoding and once that first line is converted we may find
2083 * out that a different decoder is needed to process the input.
2085 * Returns the number of byte written if success, or
2087 * -2 if the transcoding fails (for *in is not valid utf8 string or
2088 * the result of transformation can't fit into the encoding we want), or
2091 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2101 if ((input == NULL) || (input->encoder == NULL) ||
2102 (input->buffer == NULL) || (input->raw == NULL))
2104 out = input->buffer;
2107 toconv = xmlBufUse(in);
2110 written = xmlBufAvail(out) - 1; /* count '\0' */
2112 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2113 * 45 chars should be sufficient to reach the end of the encoding
2114 * declaration without going too far inside the document content.
2115 * on UTF-16 this means 90bytes, on UCS4 this means 180
2116 * The actual value depending on guessed encoding is passed as @len
2120 if (toconv > (unsigned int) len)
2126 if (toconv * 2 >= written) {
2127 xmlBufGrow(out, toconv * 2);
2128 written = xmlBufAvail(out) - 1;
2135 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2136 xmlBufContent(in), &c_in);
2137 xmlBufShrink(in, c_in);
2138 xmlBufAddLen(out, c_out);
2144 #ifdef DEBUG_ENCODING
2145 xmlGenericError(xmlGenericErrorContext,
2146 "converted %d bytes to %d bytes of input\n",
2151 #ifdef DEBUG_ENCODING
2152 xmlGenericError(xmlGenericErrorContext,
2153 "converted %d bytes to %d bytes of input, %d left\n",
2154 c_in, c_out, (int)xmlBufUse(in));
2158 #ifdef DEBUG_ENCODING
2159 xmlGenericError(xmlGenericErrorContext,
2160 "converted %d bytes to %d bytes of input, %d left\n",
2161 c_in, c_out, (int)xmlBufUse(in));
2166 const xmlChar *content = xmlBufContent(in);
2168 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2169 content[0], content[1],
2170 content[2], content[3]);
2172 xmlEncodingErr(XML_I18N_CONV_FAILED,
2173 "input conversion failed due to input error, bytes %s\n",
2178 * Ignore when input buffer is not on a boundary
2180 if (ret == -3) ret = 0;
2181 if (ret == -1) ret = 0;
2187 * @input: a parser input buffer
2188 * @flush: try to flush all the raw buffer
2190 * Generic front-end for the encoding handler on parser input
2192 * Returns the number of byte written if success, or
2194 * -2 if the transcoding fails (for *in is not valid utf8 string or
2195 * the result of transformation can't fit into the encoding we want), or
2198 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2208 if ((input == NULL) || (input->encoder == NULL) ||
2209 (input->buffer == NULL) || (input->raw == NULL))
2211 out = input->buffer;
2214 toconv = xmlBufUse(in);
2217 if ((toconv > 64 * 1024) && (flush == 0))
2219 written = xmlBufAvail(out);
2221 written--; /* count '\0' */
2222 if (toconv * 2 >= written) {
2223 xmlBufGrow(out, toconv * 2);
2224 written = xmlBufAvail(out);
2226 written--; /* count '\0' */
2228 if ((written > 128 * 1024) && (flush == 0))
2229 written = 128 * 1024;
2233 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2234 xmlBufContent(in), &c_in);
2235 xmlBufShrink(in, c_in);
2236 xmlBufAddLen(out, c_out);
2242 #ifdef DEBUG_ENCODING
2243 xmlGenericError(xmlGenericErrorContext,
2244 "converted %d bytes to %d bytes of input\n",
2249 #ifdef DEBUG_ENCODING
2250 xmlGenericError(xmlGenericErrorContext,
2251 "converted %d bytes to %d bytes of input, %d left\n",
2252 c_in, c_out, (int)xmlBufUse(in));
2256 #ifdef DEBUG_ENCODING
2257 xmlGenericError(xmlGenericErrorContext,
2258 "converted %d bytes to %d bytes of input, %d left\n",
2259 c_in, c_out, (int)xmlBufUse(in));
2264 const xmlChar *content = xmlBufContent(in);
2266 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2267 content[0], content[1],
2268 content[2], content[3]);
2270 xmlEncodingErr(XML_I18N_CONV_FAILED,
2271 "input conversion failed due to input error, bytes %s\n",
2276 * Ignore when input buffer is not on a boundary
2280 return (c_out? c_out : ret);
2285 * @handler: char encoding transformation data structure
2286 * @out: an xmlBuffer for the output.
2287 * @in: an xmlBuffer for the input
2289 * Generic front-end for the encoding handler input function
2291 * Returns the number of byte written if success, or
2293 * -2 if the transcoding fails (for *in is not valid utf8 string or
2294 * the result of transformation can't fit into the encoding we want), or
2297 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2304 if (handler == NULL)
2314 written = out->size - out->use -1; /* count '\0' */
2315 if (toconv * 2 >= written) {
2316 xmlBufferGrow(out, out->size + toconv * 2);
2317 written = out->size - out->use - 1;
2319 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2320 in->content, &toconv);
2321 xmlBufferShrink(in, toconv);
2322 out->use += written;
2323 out->content[out->use] = 0;
2329 #ifdef DEBUG_ENCODING
2330 xmlGenericError(xmlGenericErrorContext,
2331 "converted %d bytes to %d bytes of input\n",
2336 #ifdef DEBUG_ENCODING
2337 xmlGenericError(xmlGenericErrorContext,
2338 "converted %d bytes to %d bytes of input, %d left\n",
2339 toconv, written, in->use);
2343 #ifdef DEBUG_ENCODING
2344 xmlGenericError(xmlGenericErrorContext,
2345 "converted %d bytes to %d bytes of input, %d left\n",
2346 toconv, written, in->use);
2352 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353 in->content[0], in->content[1],
2354 in->content[2], in->content[3]);
2356 xmlEncodingErr(XML_I18N_CONV_FAILED,
2357 "input conversion failed due to input error, bytes %s\n",
2362 * Ignore when input buffer is not on a boundary
2366 return (written? written : ret);
2369 #ifdef LIBXML_OUTPUT_ENABLED
2372 * @output: a parser output buffer
2373 * @init: is this an initialization call without data
2375 * Generic front-end for the encoding handler on parser output
2376 * a first call with @init == 1 has to be made first to initiate the
2377 * output in case of non-stateless encoding needing to initiate their
2378 * state or the output (like the BOM in UTF16).
2379 * In case of UTF8 sequence conversion errors for the given encoder,
2380 * the content will be automatically remapped to a CharRef sequence.
2382 * Returns the number of byte written if success, or
2384 * -2 if the transcoding fails (for *in is not valid utf8 string or
2385 * the result of transformation can't fit into the encoding we want), or
2388 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2392 size_t writtentot = 0;
2399 if ((output == NULL) || (output->encoder == NULL) ||
2400 (output->buffer == NULL) || (output->conv == NULL))
2403 in = output->buffer;
2407 written = xmlBufAvail(out);
2409 written--; /* count '\0' */
2412 * First specific handling of the initialization call
2417 /* TODO: Check return value. */
2418 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2420 xmlBufAddLen(out, c_out);
2421 #ifdef DEBUG_ENCODING
2422 xmlGenericError(xmlGenericErrorContext,
2423 "initialized encoder\n");
2429 * Conversion itself.
2431 toconv = xmlBufUse(in);
2434 if (toconv > 64 * 1024)
2436 if (toconv * 4 >= written) {
2437 xmlBufGrow(out, toconv * 4);
2438 written = xmlBufAvail(out) - 1;
2440 if (written > 256 * 1024)
2441 written = 256 * 1024;
2445 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2446 xmlBufContent(in), &c_in);
2447 xmlBufShrink(in, c_in);
2448 xmlBufAddLen(out, c_out);
2449 writtentot += c_out;
2452 /* Can be a limitation of iconv or uconv */
2458 if (ret >= 0) output += ret;
2461 * Attempt to handle error cases
2465 #ifdef DEBUG_ENCODING
2466 xmlGenericError(xmlGenericErrorContext,
2467 "converted %d bytes to %d bytes of output\n",
2472 #ifdef DEBUG_ENCODING
2473 xmlGenericError(xmlGenericErrorContext,
2474 "output conversion failed by lack of space\n");
2478 #ifdef DEBUG_ENCODING
2479 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2480 c_in, c_out, (int) xmlBufUse(in));
2484 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2485 "xmlCharEncOutFunc: no output function !\n", NULL);
2489 xmlChar charref[20];
2490 int len = (int) xmlBufUse(in);
2491 xmlChar *content = xmlBufContent(in);
2492 int cur, charrefLen;
2494 cur = xmlGetUTF8Char(content, &len);
2498 #ifdef DEBUG_ENCODING
2499 xmlGenericError(xmlGenericErrorContext,
2500 "handling output conversion error\n");
2501 xmlGenericError(xmlGenericErrorContext,
2502 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2503 content[0], content[1],
2504 content[2], content[3]);
2507 * Removes the UTF8 sequence, and replace it by a charref
2508 * and continue the transcoding phase, hoping the error
2509 * did not mangle the encoder state.
2511 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2513 xmlBufShrink(in, len);
2514 xmlBufGrow(out, charrefLen * 4);
2515 c_out = xmlBufAvail(out) - 1;
2517 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2520 if ((ret < 0) || (c_in != charrefLen)) {
2523 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2524 content[0], content[1],
2525 content[2], content[3]);
2527 xmlEncodingErr(XML_I18N_CONV_FAILED,
2528 "output conversion failed due to conv error, bytes %s\n",
2530 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2535 xmlBufAddLen(out, c_out);
2536 writtentot += c_out;
2545 * xmlCharEncOutFunc:
2546 * @handler: char enconding transformation data structure
2547 * @out: an xmlBuffer for the output.
2548 * @in: an xmlBuffer for the input
2550 * Generic front-end for the encoding handler output function
2551 * a first call with @in == NULL has to be made firs to initiate the
2552 * output in case of non-stateless encoding needing to initiate their
2553 * state or the output (like the BOM in UTF16).
2554 * In case of UTF8 sequence conversion errors for the given encoder,
2555 * the content will be automatically remapped to a CharRef sequence.
2557 * Returns the number of byte written if success, or
2559 * -2 if the transcoding fails (for *in is not valid utf8 string or
2560 * the result of transformation can't fit into the encoding we want), or
2563 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2571 if (handler == NULL) return(-1);
2572 if (out == NULL) return(-1);
2576 written = out->size - out->use;
2579 written--; /* Gennady: count '/0' */
2582 * First specific handling of in = NULL, i.e. the initialization call
2586 /* TODO: Check return value. */
2587 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2589 out->use += written;
2590 out->content[out->use] = 0;
2591 #ifdef DEBUG_ENCODING
2592 xmlGenericError(xmlGenericErrorContext,
2593 "initialized encoder\n");
2599 * Conversion itself.
2604 if (toconv * 4 >= written) {
2605 xmlBufferGrow(out, toconv * 4);
2606 written = out->size - out->use - 1;
2608 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2609 in->content, &toconv);
2610 xmlBufferShrink(in, toconv);
2611 out->use += written;
2612 writtentot += written;
2613 out->content[out->use] = 0;
2616 /* Can be a limitation of iconv or uconv */
2622 if (ret >= 0) output += ret;
2625 * Attempt to handle error cases
2629 #ifdef DEBUG_ENCODING
2630 xmlGenericError(xmlGenericErrorContext,
2631 "converted %d bytes to %d bytes of output\n",
2636 #ifdef DEBUG_ENCODING
2637 xmlGenericError(xmlGenericErrorContext,
2638 "output conversion failed by lack of space\n");
2642 #ifdef DEBUG_ENCODING
2643 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2644 toconv, written, in->use);
2648 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2649 "xmlCharEncOutFunc: no output function !\n", NULL);
2653 xmlChar charref[20];
2655 const xmlChar *utf = (const xmlChar *) in->content;
2656 int cur, charrefLen;
2658 cur = xmlGetUTF8Char(utf, &len);
2662 #ifdef DEBUG_ENCODING
2663 xmlGenericError(xmlGenericErrorContext,
2664 "handling output conversion error\n");
2665 xmlGenericError(xmlGenericErrorContext,
2666 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2667 in->content[0], in->content[1],
2668 in->content[2], in->content[3]);
2671 * Removes the UTF8 sequence, and replace it by a charref
2672 * and continue the transcoding phase, hoping the error
2673 * did not mangle the encoder state.
2675 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2677 xmlBufferShrink(in, len);
2678 xmlBufferGrow(out, charrefLen * 4);
2679 written = out->size - out->use - 1;
2680 toconv = charrefLen;
2681 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2684 if ((ret < 0) || (toconv != charrefLen)) {
2687 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2688 in->content[0], in->content[1],
2689 in->content[2], in->content[3]);
2691 xmlEncodingErr(XML_I18N_CONV_FAILED,
2692 "output conversion failed due to conv error, bytes %s\n",
2694 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2695 in->content[0] = ' ';
2699 out->use += written;
2700 writtentot += written;
2701 out->content[out->use] = 0;
2709 * xmlCharEncCloseFunc:
2710 * @handler: char enconding transformation data structure
2712 * Generic front-end for encoding handler close function
2714 * Returns 0 if success, or -1 in case of error
2717 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2720 int i, handler_in_list = 0;
2722 if (handler == NULL) return(-1);
2723 if (handler->name == NULL) return(-1);
2724 if (handlers != NULL) {
2725 for (i = 0;i < nbCharEncodingHandler; i++) {
2726 if (handler == handlers[i]) {
2727 handler_in_list = 1;
2732 #ifdef LIBXML_ICONV_ENABLED
2734 * Iconv handlers can be used only once, free the whole block.
2735 * and the associated icon resources.
2737 if ((handler_in_list == 0) &&
2738 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2740 if (handler->iconv_out != NULL) {
2741 if (iconv_close(handler->iconv_out))
2743 handler->iconv_out = NULL;
2745 if (handler->iconv_in != NULL) {
2746 if (iconv_close(handler->iconv_in))
2748 handler->iconv_in = NULL;
2751 #endif /* LIBXML_ICONV_ENABLED */
2752 #ifdef LIBXML_ICU_ENABLED
2753 if ((handler_in_list == 0) &&
2754 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2756 if (handler->uconv_out != NULL) {
2757 closeIcuConverter(handler->uconv_out);
2758 handler->uconv_out = NULL;
2760 if (handler->uconv_in != NULL) {
2761 closeIcuConverter(handler->uconv_in);
2762 handler->uconv_in = NULL;
2767 /* free up only dynamic handlers iconv/uconv */
2768 if (handler->name != NULL)
2769 xmlFree(handler->name);
2770 handler->name = NULL;
2773 #ifdef DEBUG_ENCODING
2775 xmlGenericError(xmlGenericErrorContext,
2776 "failed to close the encoding handler\n");
2778 xmlGenericError(xmlGenericErrorContext,
2779 "closed the encoding handler\n");
2787 * @ctxt: an XML parser context
2789 * This function provides the current index of the parser relative
2790 * to the start of the current entity. This function is computed in
2791 * bytes from the beginning starting at zero and finishing at the
2792 * size in byte of the file if parsing a file. The function is
2793 * of constant cost if the input is UTF-8 but can be costly if run
2794 * on non-UTF-8 input.
2796 * Returns the index in bytes from the beginning of the entity or -1
2797 * in case the index could not be computed.
2800 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2801 xmlParserInputPtr in;
2803 if (ctxt == NULL) return(-1);
2805 if (in == NULL) return(-1);
2806 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2807 unsigned int unused = 0;
2808 xmlCharEncodingHandler * handler = in->buf->encoder;
2810 * Encoding conversion, compute the number of unused original
2811 * bytes from the input not consumed and substract that from
2812 * the raw consumed value, this is not a cheap operation
2814 if (in->end - in->cur > 0) {
2815 unsigned char convbuf[32000];
2816 const unsigned char *cur = (const unsigned char *)in->cur;
2817 int toconv = in->end - in->cur, written = 32000;
2822 toconv = in->end - cur;
2824 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2834 } while (ret == -2);
2836 if (in->buf->rawconsumed < unused)
2838 return(in->buf->rawconsumed - unused);
2840 return(in->consumed + (in->cur - in->base));
2843 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2844 #ifdef LIBXML_ISO8859X_ENABLED
2848 * @out: a pointer to an array of bytes to store the result
2849 * @outlen: the length of @out
2850 * @in: a pointer to an array of UTF-8 chars
2851 * @inlen: the length of @in
2852 * @xlattable: the 2-level transcoding table
2854 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2855 * block of chars out.
2857 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2858 * The value of @inlen after return is the number of octets consumed
2859 * as the return value is positive, else unpredictable.
2860 * The value of @outlen after return is the number of ocetes consumed.
2863 UTF8ToISO8859x(unsigned char* out, int *outlen,
2864 const unsigned char* in, int *inlen,
2865 unsigned char const *xlattable) {
2866 const unsigned char* outstart = out;
2867 const unsigned char* inend;
2868 const unsigned char* instart = in;
2869 const unsigned char* processed = in;
2871 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2872 (xlattable == NULL))
2876 * initialization nothing to do
2882 inend = in + (*inlen);
2883 while (in < inend) {
2884 unsigned char d = *in++;
2887 } else if (d < 0xC0) {
2888 /* trailing byte in leading position */
2889 *outlen = out - outstart;
2890 *inlen = processed - instart;
2892 } else if (d < 0xE0) {
2894 if (!(in < inend)) {
2895 /* trailing byte not in input buffer */
2896 *outlen = out - outstart;
2897 *inlen = processed - instart;
2901 if ((c & 0xC0) != 0x80) {
2902 /* not a trailing byte */
2903 *outlen = out - outstart;
2904 *inlen = processed - instart;
2909 d = xlattable [48 + c + xlattable [d] * 64];
2911 /* not in character set */
2912 *outlen = out - outstart;
2913 *inlen = processed - instart;
2917 } else if (d < 0xF0) {
2920 if (!(in < inend - 1)) {
2921 /* trailing bytes not in input buffer */
2922 *outlen = out - outstart;
2923 *inlen = processed - instart;
2927 if ((c1 & 0xC0) != 0x80) {
2928 /* not a trailing byte (c1) */
2929 *outlen = out - outstart;
2930 *inlen = processed - instart;
2934 if ((c2 & 0xC0) != 0x80) {
2935 /* not a trailing byte (c2) */
2936 *outlen = out - outstart;
2937 *inlen = processed - instart;
2943 d = xlattable [48 + c2 + xlattable [48 + c1 +
2944 xlattable [32 + d] * 64] * 64];
2946 /* not in character set */
2947 *outlen = out - outstart;
2948 *inlen = processed - instart;
2953 /* cannot transcode >= U+010000 */
2954 *outlen = out - outstart;
2955 *inlen = processed - instart;
2960 *outlen = out - outstart;
2961 *inlen = processed - instart;
2967 * @out: a pointer to an array of bytes to store the result
2968 * @outlen: the length of @out
2969 * @in: a pointer to an array of ISO Latin 1 chars
2970 * @inlen: the length of @in
2972 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2973 * block of chars out.
2974 * Returns 0 if success, or -1 otherwise
2975 * The value of @inlen after return is the number of octets consumed
2976 * The value of @outlen after return is the number of ocetes produced.
2979 ISO8859xToUTF8(unsigned char* out, int *outlen,
2980 const unsigned char* in, int *inlen,
2981 unsigned short const *unicodetable) {
2982 unsigned char* outstart = out;
2983 unsigned char* outend;
2984 const unsigned char* instart = in;
2985 const unsigned char* inend;
2986 const unsigned char* instop;
2989 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2990 (in == NULL) || (unicodetable == NULL))
2992 outend = out + *outlen;
2993 inend = in + *inlen;
2996 while ((in < inend) && (out < outend - 2)) {
2998 c = unicodetable [*in - 0x80];
3000 /* undefined code point */
3001 *outlen = out - outstart;
3002 *inlen = in - instart;
3006 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3007 *out++ = (c & 0x3F) | 0x80;
3009 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3010 *out++ = ((c >> 6) & 0x3F) | 0x80;
3011 *out++ = (c & 0x3F) | 0x80;
3015 if (instop - in > outend - out) instop = in + (outend - out);
3016 while ((*in < 0x80) && (in < instop)) {
3020 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3023 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3026 *outlen = out - outstart;
3027 *inlen = in - instart;
3032 /************************************************************************
3033 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3034 ************************************************************************/
3036 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3037 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3038 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3039 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3040 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3041 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3042 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3043 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3044 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3045 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3046 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3047 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3048 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3049 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3050 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3051 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3052 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3055 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3056 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3064 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3065 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3066 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3067 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3068 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3071 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3076 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3077 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3078 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3079 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3080 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3081 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3082 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3085 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3086 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3087 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3088 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3089 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3090 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3091 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3092 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3093 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3094 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3095 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3096 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3097 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3098 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3099 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3100 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3101 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3104 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3105 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3113 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3114 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3115 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3116 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3118 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3132 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3133 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3134 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3135 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3138 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3139 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3140 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3141 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3142 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3143 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3144 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3145 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3146 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3147 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3148 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3149 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3150 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3151 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3152 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3153 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3154 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3157 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3158 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3159 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3166 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3167 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3168 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3169 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3170 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3172 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3173 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3174 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3176 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3179 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3182 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3183 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3184 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3187 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3188 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3189 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3190 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3191 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3192 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3193 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3194 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3195 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3196 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3197 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3198 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3199 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3200 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3201 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3202 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3203 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3206 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3207 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3215 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3216 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3219 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3220 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3221 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3222 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3223 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3237 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3238 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3239 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3240 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3241 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3242 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3243 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3244 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3245 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3246 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3247 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3248 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3249 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3250 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3251 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3255 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3256 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3264 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3265 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3273 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3274 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3275 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3276 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3282 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3283 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3284 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3285 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3286 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3287 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3288 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3289 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3290 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3291 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3292 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3293 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3294 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3295 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3296 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3297 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3300 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3301 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3309 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3310 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3311 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3312 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3325 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3326 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3327 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3328 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3335 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3336 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3337 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3338 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3339 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3340 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3341 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3342 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3343 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3344 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3345 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3346 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3347 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3348 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3349 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3350 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3353 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3354 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3362 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3363 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3364 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3365 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3378 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3383 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3388 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3389 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3390 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3391 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3392 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3393 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3394 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3395 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3396 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3397 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3398 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3399 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3400 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3401 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3402 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3403 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3406 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3407 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3415 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3416 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3417 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3418 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3419 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3420 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3421 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3433 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3434 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3435 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3436 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3437 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3438 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3439 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3440 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3441 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3442 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3443 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3444 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3445 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3446 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3447 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3448 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3451 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3452 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3460 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3461 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3462 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3463 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3464 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3465 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3466 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3467 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3468 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3470 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3471 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3480 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3481 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3482 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3485 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3486 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3487 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3488 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3489 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3490 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3491 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3492 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3493 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3494 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3495 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3496 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3497 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3498 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3499 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3500 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3501 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3504 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3505 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3507 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3513 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3514 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3520 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3521 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3522 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3523 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3524 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3529 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3535 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3536 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3537 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3538 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3539 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3540 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3541 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3542 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3543 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3544 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3545 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3546 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3547 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3548 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3549 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3550 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3553 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3554 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3562 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3563 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3564 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3565 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3574 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3575 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3577 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3579 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3580 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3581 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3582 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3583 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3584 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3587 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3588 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3589 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3590 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3591 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3592 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3593 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3594 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3595 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3596 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3597 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3598 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3599 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3600 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3601 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3602 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3603 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3606 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3607 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3609 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3615 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3616 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3617 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3622 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3642 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3647 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3648 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3649 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3652 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3653 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3658 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3659 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3660 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3661 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3664 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3665 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3668 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3671 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3672 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3682 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3683 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3695 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3696 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3697 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3698 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3701 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3702 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3703 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3704 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3705 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3706 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3707 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3708 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3709 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3710 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3711 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3712 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3713 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3714 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3715 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3716 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3717 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3720 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3721 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3723 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3729 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3730 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3731 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3732 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3733 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3738 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3740 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3758 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3764 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3767 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3768 const unsigned char* in, int *inlen) {
3769 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3771 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3772 const unsigned char* in, int *inlen) {
3773 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3776 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3777 const unsigned char* in, int *inlen) {
3778 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3780 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3781 const unsigned char* in, int *inlen) {
3782 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3785 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3786 const unsigned char* in, int *inlen) {
3787 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3789 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3790 const unsigned char* in, int *inlen) {
3791 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3794 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3795 const unsigned char* in, int *inlen) {
3796 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3798 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3799 const unsigned char* in, int *inlen) {
3800 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3803 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3804 const unsigned char* in, int *inlen) {
3805 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3807 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3808 const unsigned char* in, int *inlen) {
3809 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3812 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3813 const unsigned char* in, int *inlen) {
3814 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3816 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3817 const unsigned char* in, int *inlen) {
3818 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3821 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3822 const unsigned char* in, int *inlen) {
3823 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3825 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3826 const unsigned char* in, int *inlen) {
3827 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3830 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3831 const unsigned char* in, int *inlen) {
3832 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3834 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3835 const unsigned char* in, int *inlen) {
3836 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3839 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3840 const unsigned char* in, int *inlen) {
3841 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3843 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3844 const unsigned char* in, int *inlen) {
3845 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3848 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3849 const unsigned char* in, int *inlen) {
3850 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3852 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3853 const unsigned char* in, int *inlen) {
3854 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3857 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3858 const unsigned char* in, int *inlen) {
3859 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3861 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3862 const unsigned char* in, int *inlen) {
3863 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3866 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3867 const unsigned char* in, int *inlen) {
3868 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3870 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3871 const unsigned char* in, int *inlen) {
3872 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3875 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3876 const unsigned char* in, int *inlen) {
3877 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3879 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3880 const unsigned char* in, int *inlen) {
3881 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3884 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3885 const unsigned char* in, int *inlen) {
3886 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3888 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3889 const unsigned char* in, int *inlen) {
3890 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3894 xmlRegisterCharEncodingHandlersISO8859x (void) {
3895 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3896 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3897 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3898 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3899 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3900 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3901 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3902 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3903 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3904 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3905 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3906 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3907 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3908 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3914 #define bottom_encoding
3915 #include "elfgcchack.h"