2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
34 #ifdef LIBXML_ICONV_ENABLED
39 #include <libxml/encoding.h>
40 #include <libxml/xmlmemory.h>
41 #ifdef LIBXML_HTML_ENABLED
42 #include <libxml/HTMLparser.h>
44 #include <libxml/globals.h>
45 #include <libxml/xmlerror.h>
47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52 struct _xmlCharEncodingAlias {
57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58 static int xmlCharEncodingAliasesNb = 0;
59 static int xmlCharEncodingAliasesMax = 0;
61 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
63 #define DEBUG_ENCODING /* Define this to get encoding traces */
66 #ifdef LIBXML_ISO8859X_ENABLED
67 static void xmlRegisterCharEncodingHandlersISO8859x (void);
71 static int xmlLittleEndian = 1;
74 * xmlEncodingErrMemory:
75 * @extra: extra informations
77 * Handle an out of memory condition
80 xmlEncodingErrMemory(const char *extra)
82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 * @error: the error number
88 * @msg: the error message
93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96 XML_FROM_I18N, error, XML_ERR_FATAL,
97 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
100 #ifdef LIBXML_ICU_ENABLED
102 openIcuConverter(const char* name, int toUnicode)
104 UErrorCode status = U_ZERO_ERROR;
105 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
109 conv->uconv = ucnv_open(name, &status);
110 if (U_FAILURE(status))
113 status = U_ZERO_ERROR;
115 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
116 NULL, NULL, NULL, &status);
119 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
122 if (U_FAILURE(status))
125 status = U_ZERO_ERROR;
126 conv->utf8 = ucnv_open("UTF-8", &status);
127 if (U_SUCCESS(status))
132 ucnv_close(conv->uconv);
138 closeIcuConverter(uconv_t *conv)
141 ucnv_close(conv->uconv);
142 ucnv_close(conv->utf8);
146 #endif /* LIBXML_ICU_ENABLED */
148 /************************************************************************
150 * Conversions To/From UTF8 encoding *
152 ************************************************************************/
156 * @out: a pointer to an array of bytes to store the result
157 * @outlen: the length of @out
158 * @in: a pointer to an array of ASCII chars
159 * @inlen: the length of @in
161 * Take a block of ASCII chars in and try to convert it to an UTF-8
162 * block of chars out.
163 * Returns 0 if success, or -1 otherwise
164 * The value of @inlen after return is the number of octets consumed
165 * if the return value is positive, else unpredictable.
166 * The value of @outlen after return is the number of octets consumed.
169 asciiToUTF8(unsigned char* out, int *outlen,
170 const unsigned char* in, int *inlen) {
171 unsigned char* outstart = out;
172 const unsigned char* base = in;
173 const unsigned char* processed = in;
174 unsigned char* outend = out + *outlen;
175 const unsigned char* inend;
178 inend = in + (*inlen);
179 while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 *outlen = out - outstart;
188 *inlen = processed - base;
192 processed = (const unsigned char*) in;
194 *outlen = out - outstart;
195 *inlen = processed - base;
199 #ifdef LIBXML_OUTPUT_ENABLED
202 * @out: a pointer to an array of bytes to store the result
203 * @outlen: the length of @out
204 * @in: a pointer to an array of UTF-8 chars
205 * @inlen: the length of @in
207 * Take a block of UTF-8 chars in and try to convert it to an ASCII
208 * block of chars out.
210 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
211 * The value of @inlen after return is the number of octets consumed
212 * if the return value is positive, else unpredictable.
213 * The value of @outlen after return is the number of octets consumed.
216 UTF8Toascii(unsigned char* out, int *outlen,
217 const unsigned char* in, int *inlen) {
218 const unsigned char* processed = in;
219 const unsigned char* outend;
220 const unsigned char* outstart = out;
221 const unsigned char* instart = in;
222 const unsigned char* inend;
226 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
229 * initialization nothing to do
235 inend = in + (*inlen);
236 outend = out + (*outlen);
239 if (d < 0x80) { c= d; trailing= 0; }
241 /* trailing byte in leading position */
242 *outlen = out - outstart;
243 *inlen = processed - instart;
245 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
246 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
247 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
249 /* no chance for this in Ascii */
250 *outlen = out - outstart;
251 *inlen = processed - instart;
255 if (inend - in < trailing) {
259 for ( ; trailing; trailing--) {
260 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
266 /* assertion: c is a single UTF-4 value */
272 /* no chance for this in Ascii */
273 *outlen = out - outstart;
274 *inlen = processed - instart;
279 *outlen = out - outstart;
280 *inlen = processed - instart;
283 #endif /* LIBXML_OUTPUT_ENABLED */
287 * @out: a pointer to an array of bytes to store the result
288 * @outlen: the length of @out
289 * @in: a pointer to an array of ISO Latin 1 chars
290 * @inlen: the length of @in
292 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
293 * block of chars out.
294 * Returns the number of bytes written if success, or -1 otherwise
295 * The value of @inlen after return is the number of octets consumed
296 * if the return value is positive, else unpredictable.
297 * The value of @outlen after return is the number of octets consumed.
300 isolat1ToUTF8(unsigned char* out, int *outlen,
301 const unsigned char* in, int *inlen) {
302 unsigned char* outstart = out;
303 const unsigned char* base = in;
304 unsigned char* outend;
305 const unsigned char* inend;
306 const unsigned char* instop;
308 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
311 outend = out + *outlen;
312 inend = in + (*inlen);
315 while ((in < inend) && (out < outend - 1)) {
317 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
318 *out++ = ((*in) & 0x3F) | 0x80;
321 if ((instop - in) > (outend - out)) instop = in + (outend - out);
322 while ((in < instop) && (*in < 0x80)) {
326 if ((in < inend) && (out < outend) && (*in < 0x80)) {
329 *outlen = out - outstart;
336 * @out: a pointer to an array of bytes to store the result
337 * @outlen: the length of @out
338 * @inb: a pointer to an array of UTF-8 chars
339 * @inlenb: the length of @in in UTF-8 chars
341 * No op copy operation for UTF8 handling.
343 * Returns the number of bytes written, or -1 if lack of space.
344 * The value of *inlen after return is the number of octets consumed
345 * if the return value is positive, else unpredictable.
348 UTF8ToUTF8(unsigned char* out, int *outlen,
349 const unsigned char* inb, int *inlenb)
353 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
355 if (*outlen > *inlenb) {
363 memcpy(out, inb, len);
371 #ifdef LIBXML_OUTPUT_ENABLED
374 * @out: a pointer to an array of bytes to store the result
375 * @outlen: the length of @out
376 * @in: a pointer to an array of UTF-8 chars
377 * @inlen: the length of @in
379 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
380 * block of chars out.
382 * Returns the number of bytes written if success, -2 if the transcoding fails,
384 * The value of @inlen after return is the number of octets consumed
385 * if the return value is positive, else unpredictable.
386 * The value of @outlen after return is the number of octets consumed.
389 UTF8Toisolat1(unsigned char* out, int *outlen,
390 const unsigned char* in, int *inlen) {
391 const unsigned char* processed = in;
392 const unsigned char* outend;
393 const unsigned char* outstart = out;
394 const unsigned char* instart = in;
395 const unsigned char* inend;
399 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
402 * initialization nothing to do
408 inend = in + (*inlen);
409 outend = out + (*outlen);
412 if (d < 0x80) { c= d; trailing= 0; }
414 /* trailing byte in leading position */
415 *outlen = out - outstart;
416 *inlen = processed - instart;
418 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
419 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
420 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
422 /* no chance for this in IsoLat1 */
423 *outlen = out - outstart;
424 *inlen = processed - instart;
428 if (inend - in < trailing) {
432 for ( ; trailing; trailing--) {
435 if (((d= *in++) & 0xC0) != 0x80) {
436 *outlen = out - outstart;
437 *inlen = processed - instart;
444 /* assertion: c is a single UTF-4 value */
450 /* no chance for this in IsoLat1 */
451 *outlen = out - outstart;
452 *inlen = processed - instart;
457 *outlen = out - outstart;
458 *inlen = processed - instart;
461 #endif /* LIBXML_OUTPUT_ENABLED */
465 * @out: a pointer to an array of bytes to store the result
466 * @outlen: the length of @out
467 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
468 * @inlenb: the length of @in in UTF-16LE chars
470 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
471 * block of chars out. This function assumes the endian property
472 * is the same between the native type of this machine and the
475 * Returns the number of bytes written, or -1 if lack of space, or -2
476 * if the transcoding fails (if *in is not a valid utf16 string)
477 * The value of *inlen after return is the number of octets consumed
478 * if the return value is positive, else unpredictable.
481 UTF16LEToUTF8(unsigned char* out, int *outlen,
482 const unsigned char* inb, int *inlenb)
484 unsigned char* outstart = out;
485 const unsigned char* processed = inb;
486 unsigned char* outend = out + *outlen;
487 unsigned short* in = (unsigned short*) inb;
488 unsigned short* inend;
489 unsigned int c, d, inlen;
493 if ((*inlenb % 2) == 1)
497 while ((in < inend) && (out - outstart + 5 < *outlen)) {
498 if (xmlLittleEndian) {
501 tmp = (unsigned char *) in;
503 c = c | (((unsigned int)*tmp) << 8);
506 if ((c & 0xFC00) == 0xD800) { /* surrogates */
507 if (in >= inend) { /* (in > inend) shouldn't happens */
510 if (xmlLittleEndian) {
513 tmp = (unsigned char *) in;
515 d = d | (((unsigned int)*tmp) << 8);
518 if ((d & 0xFC00) == 0xDC00) {
525 *outlen = out - outstart;
526 *inlenb = processed - inb;
531 /* assertion: c is a single UTF-4 value */
534 if (c < 0x80) { *out++= c; bits= -6; }
535 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
536 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
537 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
539 for ( ; bits >= 0; bits-= 6) {
542 *out++= ((c >> bits) & 0x3F) | 0x80;
544 processed = (const unsigned char*) in;
546 *outlen = out - outstart;
547 *inlenb = processed - inb;
551 #ifdef LIBXML_OUTPUT_ENABLED
554 * @outb: a pointer to an array of bytes to store the result
555 * @outlen: the length of @outb
556 * @in: a pointer to an array of UTF-8 chars
557 * @inlen: the length of @in
559 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
560 * block of chars out.
562 * Returns the number of bytes written, or -1 if lack of space, or -2
563 * if the transcoding failed.
566 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567 const unsigned char* in, int *inlen)
569 unsigned short* out = (unsigned short*) outb;
570 const unsigned char* processed = in;
571 const unsigned char *const instart = in;
572 unsigned short* outstart= out;
573 unsigned short* outend;
574 const unsigned char* inend;
578 unsigned short tmp1, tmp2;
580 /* UTF16LE encoding has no BOM */
581 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
588 outend = out + (*outlen / 2);
591 if (d < 0x80) { c= d; trailing= 0; }
593 /* trailing byte in leading position */
594 *outlen = (out - outstart) * 2;
595 *inlen = processed - instart;
597 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
598 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
599 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
601 /* no chance for this in UTF-16 */
602 *outlen = (out - outstart) * 2;
603 *inlen = processed - instart;
607 if (inend - in < trailing) {
611 for ( ; trailing; trailing--) {
612 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
618 /* assertion: c is a single UTF-4 value */
622 if (xmlLittleEndian) {
625 tmp = (unsigned char *) out;
627 *(tmp + 1) = c >> 8 ;
631 else if (c < 0x110000) {
635 if (xmlLittleEndian) {
636 *out++ = 0xD800 | (c >> 10);
637 *out++ = 0xDC00 | (c & 0x03FF);
639 tmp1 = 0xD800 | (c >> 10);
640 tmp = (unsigned char *) out;
641 *tmp = (unsigned char) tmp1;
642 *(tmp + 1) = tmp1 >> 8;
645 tmp2 = 0xDC00 | (c & 0x03FF);
646 tmp = (unsigned char *) out;
647 *tmp = (unsigned char) tmp2;
648 *(tmp + 1) = tmp2 >> 8;
656 *outlen = (out - outstart) * 2;
657 *inlen = processed - instart;
663 * @outb: a pointer to an array of bytes to store the result
664 * @outlen: the length of @outb
665 * @in: a pointer to an array of UTF-8 chars
666 * @inlen: the length of @in
668 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
669 * block of chars out.
671 * Returns the number of bytes written, or -1 if lack of space, or -2
672 * if the transcoding failed.
675 UTF8ToUTF16(unsigned char* outb, int *outlen,
676 const unsigned char* in, int *inlen)
680 * initialization, add the Byte Order Mark for UTF-16LE
687 #ifdef DEBUG_ENCODING
688 xmlGenericError(xmlGenericErrorContext,
689 "Added FFFE Byte Order Mark\n");
697 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
699 #endif /* LIBXML_OUTPUT_ENABLED */
703 * @out: a pointer to an array of bytes to store the result
704 * @outlen: the length of @out
705 * @inb: a pointer to an array of UTF-16 passed as a byte array
706 * @inlenb: the length of @in in UTF-16 chars
708 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
709 * block of chars out. This function assumes the endian property
710 * is the same between the native type of this machine and the
713 * Returns the number of bytes written, or -1 if lack of space, or -2
714 * if the transcoding fails (if *in is not a valid utf16 string)
715 * The value of *inlen after return is the number of octets consumed
716 * if the return value is positive, else unpredictable.
719 UTF16BEToUTF8(unsigned char* out, int *outlen,
720 const unsigned char* inb, int *inlenb)
722 unsigned char* outstart = out;
723 const unsigned char* processed = inb;
724 unsigned char* outend = out + *outlen;
725 unsigned short* in = (unsigned short*) inb;
726 unsigned short* inend;
727 unsigned int c, d, inlen;
731 if ((*inlenb % 2) == 1)
736 if (xmlLittleEndian) {
737 tmp = (unsigned char *) in;
740 c = c | (unsigned int) *tmp;
745 if ((c & 0xFC00) == 0xD800) { /* surrogates */
746 if (in >= inend) { /* (in > inend) shouldn't happens */
747 *outlen = out - outstart;
748 *inlenb = processed - inb;
751 if (xmlLittleEndian) {
752 tmp = (unsigned char *) in;
755 d = d | (unsigned int) *tmp;
760 if ((d & 0xFC00) == 0xDC00) {
767 *outlen = out - outstart;
768 *inlenb = processed - inb;
773 /* assertion: c is a single UTF-4 value */
776 if (c < 0x80) { *out++= c; bits= -6; }
777 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
778 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
779 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
781 for ( ; bits >= 0; bits-= 6) {
784 *out++= ((c >> bits) & 0x3F) | 0x80;
786 processed = (const unsigned char*) in;
788 *outlen = out - outstart;
789 *inlenb = processed - inb;
793 #ifdef LIBXML_OUTPUT_ENABLED
796 * @outb: a pointer to an array of bytes to store the result
797 * @outlen: the length of @outb
798 * @in: a pointer to an array of UTF-8 chars
799 * @inlen: the length of @in
801 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
802 * block of chars out.
804 * Returns the number of byte written, or -1 by lack of space, or -2
805 * if the transcoding failed.
808 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
809 const unsigned char* in, int *inlen)
811 unsigned short* out = (unsigned short*) outb;
812 const unsigned char* processed = in;
813 const unsigned char *const instart = in;
814 unsigned short* outstart= out;
815 unsigned short* outend;
816 const unsigned char* inend;
820 unsigned short tmp1, tmp2;
822 /* UTF-16BE has no BOM */
823 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
830 outend = out + (*outlen / 2);
833 if (d < 0x80) { c= d; trailing= 0; }
835 /* trailing byte in leading position */
836 *outlen = out - outstart;
837 *inlen = processed - instart;
839 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
840 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
841 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
843 /* no chance for this in UTF-16 */
844 *outlen = out - outstart;
845 *inlen = processed - instart;
849 if (inend - in < trailing) {
853 for ( ; trailing; trailing--) {
854 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
859 /* assertion: c is a single UTF-4 value */
861 if (out >= outend) break;
862 if (xmlLittleEndian) {
863 tmp = (unsigned char *) out;
871 else if (c < 0x110000) {
872 if (out+1 >= outend) break;
874 if (xmlLittleEndian) {
875 tmp1 = 0xD800 | (c >> 10);
876 tmp = (unsigned char *) out;
878 *(tmp + 1) = (unsigned char) tmp1;
881 tmp2 = 0xDC00 | (c & 0x03FF);
882 tmp = (unsigned char *) out;
884 *(tmp + 1) = (unsigned char) tmp2;
887 *out++ = 0xD800 | (c >> 10);
888 *out++ = 0xDC00 | (c & 0x03FF);
895 *outlen = (out - outstart) * 2;
896 *inlen = processed - instart;
899 #endif /* LIBXML_OUTPUT_ENABLED */
901 /************************************************************************
903 * Generic encoding handling routines *
905 ************************************************************************/
908 * xmlDetectCharEncoding:
909 * @in: a pointer to the first bytes of the XML entity, must be at least
910 * 2 bytes long (at least 4 if encoding is UTF4 variant).
911 * @len: pointer to the length of the buffer
913 * Guess the encoding of the entity using the first bytes of the entity content
914 * according to the non-normative appendix F of the XML-1.0 recommendation.
916 * Returns one of the XML_CHAR_ENCODING_... values.
919 xmlDetectCharEncoding(const unsigned char* in, int len)
922 return(XML_CHAR_ENCODING_NONE);
924 if ((in[0] == 0x00) && (in[1] == 0x00) &&
925 (in[2] == 0x00) && (in[3] == 0x3C))
926 return(XML_CHAR_ENCODING_UCS4BE);
927 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
928 (in[2] == 0x00) && (in[3] == 0x00))
929 return(XML_CHAR_ENCODING_UCS4LE);
930 if ((in[0] == 0x00) && (in[1] == 0x00) &&
931 (in[2] == 0x3C) && (in[3] == 0x00))
932 return(XML_CHAR_ENCODING_UCS4_2143);
933 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
934 (in[2] == 0x00) && (in[3] == 0x00))
935 return(XML_CHAR_ENCODING_UCS4_3412);
936 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
937 (in[2] == 0xA7) && (in[3] == 0x94))
938 return(XML_CHAR_ENCODING_EBCDIC);
939 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
940 (in[2] == 0x78) && (in[3] == 0x6D))
941 return(XML_CHAR_ENCODING_UTF8);
943 * Although not part of the recommendation, we also
944 * attempt an "auto-recognition" of UTF-16LE and
945 * UTF-16BE encodings.
947 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
948 (in[2] == 0x3F) && (in[3] == 0x00))
949 return(XML_CHAR_ENCODING_UTF16LE);
950 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
951 (in[2] == 0x00) && (in[3] == 0x3F))
952 return(XML_CHAR_ENCODING_UTF16BE);
956 * Errata on XML-1.0 June 20 2001
957 * We now allow an UTF8 encoded BOM
959 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
961 return(XML_CHAR_ENCODING_UTF8);
963 /* For UTF-16 we can recognize by the BOM */
965 if ((in[0] == 0xFE) && (in[1] == 0xFF))
966 return(XML_CHAR_ENCODING_UTF16BE);
967 if ((in[0] == 0xFF) && (in[1] == 0xFE))
968 return(XML_CHAR_ENCODING_UTF16LE);
970 return(XML_CHAR_ENCODING_NONE);
974 * xmlCleanupEncodingAliases:
976 * Unregisters all aliases
979 xmlCleanupEncodingAliases(void) {
982 if (xmlCharEncodingAliases == NULL)
985 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
986 if (xmlCharEncodingAliases[i].name != NULL)
987 xmlFree((char *) xmlCharEncodingAliases[i].name);
988 if (xmlCharEncodingAliases[i].alias != NULL)
989 xmlFree((char *) xmlCharEncodingAliases[i].alias);
991 xmlCharEncodingAliasesNb = 0;
992 xmlCharEncodingAliasesMax = 0;
993 xmlFree(xmlCharEncodingAliases);
994 xmlCharEncodingAliases = NULL;
998 * xmlGetEncodingAlias:
999 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1001 * Lookup an encoding name for the given alias.
1003 * Returns NULL if not found, otherwise the original name
1006 xmlGetEncodingAlias(const char *alias) {
1013 if (xmlCharEncodingAliases == NULL)
1016 for (i = 0;i < 99;i++) {
1017 upper[i] = toupper(alias[i]);
1018 if (upper[i] == 0) break;
1023 * Walk down the list looking for a definition of the alias
1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027 return(xmlCharEncodingAliases[i].name);
1034 * xmlAddEncodingAlias:
1035 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1036 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1038 * Registers an alias @alias for an encoding named @name. Existing alias
1039 * will be overwritten.
1041 * Returns 0 in case of success, -1 in case of error
1044 xmlAddEncodingAlias(const char *name, const char *alias) {
1048 if ((name == NULL) || (alias == NULL))
1051 for (i = 0;i < 99;i++) {
1052 upper[i] = toupper(alias[i]);
1053 if (upper[i] == 0) break;
1057 if (xmlCharEncodingAliases == NULL) {
1058 xmlCharEncodingAliasesNb = 0;
1059 xmlCharEncodingAliasesMax = 20;
1060 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1061 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1062 if (xmlCharEncodingAliases == NULL)
1064 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1065 xmlCharEncodingAliasesMax *= 2;
1066 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1067 xmlRealloc(xmlCharEncodingAliases,
1068 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1071 * Walk down the list looking for a definition of the alias
1073 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1076 * Replace the definition.
1078 xmlFree((char *) xmlCharEncodingAliases[i].name);
1079 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1084 * Add the definition
1086 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1087 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1088 xmlCharEncodingAliasesNb++;
1093 * xmlDelEncodingAlias:
1094 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1096 * Unregisters an encoding alias @alias
1098 * Returns 0 in case of success, -1 in case of error
1101 xmlDelEncodingAlias(const char *alias) {
1107 if (xmlCharEncodingAliases == NULL)
1110 * Walk down the list looking for a definition of the alias
1112 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1113 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1114 xmlFree((char *) xmlCharEncodingAliases[i].name);
1115 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1116 xmlCharEncodingAliasesNb--;
1117 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1118 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1126 * xmlParseCharEncoding:
1127 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1129 * Compare the string to the encoding schemes already known. Note
1130 * that the comparison is case insensitive accordingly to the section
1131 * [XML] 4.3.3 Character Encoding in Entities.
1133 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1134 * if not recognized.
1137 xmlParseCharEncoding(const char* name)
1144 return(XML_CHAR_ENCODING_NONE);
1147 * Do the alias resolution
1149 alias = xmlGetEncodingAlias(name);
1153 for (i = 0;i < 499;i++) {
1154 upper[i] = toupper(name[i]);
1155 if (upper[i] == 0) break;
1159 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1160 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1161 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1164 * NOTE: if we were able to parse this, the endianness of UTF16 is
1165 * already found and in use
1167 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1168 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1170 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1171 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1172 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1175 * NOTE: if we were able to parse this, the endianness of UCS4 is
1176 * already found and in use
1178 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1179 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1180 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1183 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1184 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1185 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1187 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1188 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1189 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1191 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1192 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1193 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1194 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1195 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1196 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1197 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1199 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1200 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1201 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1203 #ifdef DEBUG_ENCODING
1204 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1206 return(XML_CHAR_ENCODING_ERROR);
1210 * xmlGetCharEncodingName:
1211 * @enc: the encoding
1213 * The "canonical" name for XML encoding.
1214 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1215 * Section 4.3.3 Character Encoding in Entities
1217 * Returns the canonical name for the given encoding
1221 xmlGetCharEncodingName(xmlCharEncoding enc) {
1223 case XML_CHAR_ENCODING_ERROR:
1225 case XML_CHAR_ENCODING_NONE:
1227 case XML_CHAR_ENCODING_UTF8:
1229 case XML_CHAR_ENCODING_UTF16LE:
1231 case XML_CHAR_ENCODING_UTF16BE:
1233 case XML_CHAR_ENCODING_EBCDIC:
1235 case XML_CHAR_ENCODING_UCS4LE:
1236 return("ISO-10646-UCS-4");
1237 case XML_CHAR_ENCODING_UCS4BE:
1238 return("ISO-10646-UCS-4");
1239 case XML_CHAR_ENCODING_UCS4_2143:
1240 return("ISO-10646-UCS-4");
1241 case XML_CHAR_ENCODING_UCS4_3412:
1242 return("ISO-10646-UCS-4");
1243 case XML_CHAR_ENCODING_UCS2:
1244 return("ISO-10646-UCS-2");
1245 case XML_CHAR_ENCODING_8859_1:
1246 return("ISO-8859-1");
1247 case XML_CHAR_ENCODING_8859_2:
1248 return("ISO-8859-2");
1249 case XML_CHAR_ENCODING_8859_3:
1250 return("ISO-8859-3");
1251 case XML_CHAR_ENCODING_8859_4:
1252 return("ISO-8859-4");
1253 case XML_CHAR_ENCODING_8859_5:
1254 return("ISO-8859-5");
1255 case XML_CHAR_ENCODING_8859_6:
1256 return("ISO-8859-6");
1257 case XML_CHAR_ENCODING_8859_7:
1258 return("ISO-8859-7");
1259 case XML_CHAR_ENCODING_8859_8:
1260 return("ISO-8859-8");
1261 case XML_CHAR_ENCODING_8859_9:
1262 return("ISO-8859-9");
1263 case XML_CHAR_ENCODING_2022_JP:
1264 return("ISO-2022-JP");
1265 case XML_CHAR_ENCODING_SHIFT_JIS:
1266 return("Shift-JIS");
1267 case XML_CHAR_ENCODING_EUC_JP:
1269 case XML_CHAR_ENCODING_ASCII:
1275 /************************************************************************
1277 * Char encoding handlers *
1279 ************************************************************************/
1282 /* the size should be growable, but it's not a big deal ... */
1283 #define MAX_ENCODING_HANDLERS 50
1284 static xmlCharEncodingHandlerPtr *handlers = NULL;
1285 static int nbCharEncodingHandler = 0;
1288 * The default is UTF-8 for XML, that's also the default used for the
1289 * parser internals, so the default encoding handler is NULL
1292 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1295 * xmlNewCharEncodingHandler:
1296 * @name: the encoding name, in UTF-8 format (ASCII actually)
1297 * @input: the xmlCharEncodingInputFunc to read that encoding
1298 * @output: the xmlCharEncodingOutputFunc to write that encoding
1300 * Create and registers an xmlCharEncodingHandler.
1302 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1304 xmlCharEncodingHandlerPtr
1305 xmlNewCharEncodingHandler(const char *name,
1306 xmlCharEncodingInputFunc input,
1307 xmlCharEncodingOutputFunc output) {
1308 xmlCharEncodingHandlerPtr handler;
1315 * Do the alias resolution
1317 alias = xmlGetEncodingAlias(name);
1322 * Keep only the uppercase version of the encoding.
1325 xmlEncodingErr(XML_I18N_NO_NAME,
1326 "xmlNewCharEncodingHandler : no name !\n", NULL);
1329 for (i = 0;i < 499;i++) {
1330 upper[i] = toupper(name[i]);
1331 if (upper[i] == 0) break;
1334 up = xmlMemStrdup(upper);
1336 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1341 * allocate and fill-up an handler block.
1343 handler = (xmlCharEncodingHandlerPtr)
1344 xmlMalloc(sizeof(xmlCharEncodingHandler));
1345 if (handler == NULL) {
1347 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1350 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1351 handler->input = input;
1352 handler->output = output;
1355 #ifdef LIBXML_ICONV_ENABLED
1356 handler->iconv_in = NULL;
1357 handler->iconv_out = NULL;
1359 #ifdef LIBXML_ICU_ENABLED
1360 handler->uconv_in = NULL;
1361 handler->uconv_out = NULL;
1365 * registers and returns the handler.
1367 xmlRegisterCharEncodingHandler(handler);
1368 #ifdef DEBUG_ENCODING
1369 xmlGenericError(xmlGenericErrorContext,
1370 "Registered encoding handler for %s\n", name);
1376 * xmlInitCharEncodingHandlers:
1378 * Initialize the char encoding support, it registers the default
1379 * encoding supported.
1380 * NOTE: while public, this function usually doesn't need to be called
1381 * in normal processing.
1384 xmlInitCharEncodingHandlers(void) {
1385 unsigned short int tst = 0x1234;
1386 unsigned char *ptr = (unsigned char *) &tst;
1388 if (handlers != NULL) return;
1390 handlers = (xmlCharEncodingHandlerPtr *)
1391 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1393 if (*ptr == 0x12) xmlLittleEndian = 0;
1394 else if (*ptr == 0x34) xmlLittleEndian = 1;
1396 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1397 "Odd problem at endianness detection\n", NULL);
1400 if (handlers == NULL) {
1401 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1404 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1405 #ifdef LIBXML_OUTPUT_ENABLED
1407 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1409 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1410 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1411 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1412 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1413 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1414 #ifdef LIBXML_HTML_ENABLED
1415 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1419 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1421 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1422 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1423 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1424 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1425 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1426 #endif /* LIBXML_OUTPUT_ENABLED */
1427 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1428 #ifdef LIBXML_ISO8859X_ENABLED
1429 xmlRegisterCharEncodingHandlersISO8859x ();
1436 * xmlCleanupCharEncodingHandlers:
1438 * Cleanup the memory allocated for the char encoding support, it
1439 * unregisters all the encoding handlers and the aliases.
1442 xmlCleanupCharEncodingHandlers(void) {
1443 xmlCleanupEncodingAliases();
1445 if (handlers == NULL) return;
1447 for (;nbCharEncodingHandler > 0;) {
1448 nbCharEncodingHandler--;
1449 if (handlers[nbCharEncodingHandler] != NULL) {
1450 if (handlers[nbCharEncodingHandler]->name != NULL)
1451 xmlFree(handlers[nbCharEncodingHandler]->name);
1452 xmlFree(handlers[nbCharEncodingHandler]);
1457 nbCharEncodingHandler = 0;
1458 xmlDefaultCharEncodingHandler = NULL;
1462 * xmlRegisterCharEncodingHandler:
1463 * @handler: the xmlCharEncodingHandlerPtr handler block
1465 * Register the char encoding handler, surprising, isn't it ?
1468 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1469 if (handlers == NULL) xmlInitCharEncodingHandlers();
1470 if ((handler == NULL) || (handlers == NULL)) {
1471 xmlEncodingErr(XML_I18N_NO_HANDLER,
1472 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1476 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1477 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1478 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1479 "MAX_ENCODING_HANDLERS");
1482 handlers[nbCharEncodingHandler++] = handler;
1486 * xmlGetCharEncodingHandler:
1487 * @enc: an xmlCharEncoding value.
1489 * Search in the registered set the handler able to read/write that encoding.
1491 * Returns the handler or NULL if not found
1493 xmlCharEncodingHandlerPtr
1494 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1495 xmlCharEncodingHandlerPtr handler;
1497 if (handlers == NULL) xmlInitCharEncodingHandlers();
1499 case XML_CHAR_ENCODING_ERROR:
1501 case XML_CHAR_ENCODING_NONE:
1503 case XML_CHAR_ENCODING_UTF8:
1505 case XML_CHAR_ENCODING_UTF16LE:
1506 return(xmlUTF16LEHandler);
1507 case XML_CHAR_ENCODING_UTF16BE:
1508 return(xmlUTF16BEHandler);
1509 case XML_CHAR_ENCODING_EBCDIC:
1510 handler = xmlFindCharEncodingHandler("EBCDIC");
1511 if (handler != NULL) return(handler);
1512 handler = xmlFindCharEncodingHandler("ebcdic");
1513 if (handler != NULL) return(handler);
1514 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1515 if (handler != NULL) return(handler);
1517 case XML_CHAR_ENCODING_UCS4BE:
1518 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1519 if (handler != NULL) return(handler);
1520 handler = xmlFindCharEncodingHandler("UCS-4");
1521 if (handler != NULL) return(handler);
1522 handler = xmlFindCharEncodingHandler("UCS4");
1523 if (handler != NULL) return(handler);
1525 case XML_CHAR_ENCODING_UCS4LE:
1526 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1527 if (handler != NULL) return(handler);
1528 handler = xmlFindCharEncodingHandler("UCS-4");
1529 if (handler != NULL) return(handler);
1530 handler = xmlFindCharEncodingHandler("UCS4");
1531 if (handler != NULL) return(handler);
1533 case XML_CHAR_ENCODING_UCS4_2143:
1535 case XML_CHAR_ENCODING_UCS4_3412:
1537 case XML_CHAR_ENCODING_UCS2:
1538 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1539 if (handler != NULL) return(handler);
1540 handler = xmlFindCharEncodingHandler("UCS-2");
1541 if (handler != NULL) return(handler);
1542 handler = xmlFindCharEncodingHandler("UCS2");
1543 if (handler != NULL) return(handler);
1547 * We used to keep ISO Latin encodings native in the
1548 * generated data. This led to so many problems that
1549 * this has been removed. One can still change this
1550 * back by registering no-ops encoders for those
1552 case XML_CHAR_ENCODING_8859_1:
1553 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1554 if (handler != NULL) return(handler);
1556 case XML_CHAR_ENCODING_8859_2:
1557 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1558 if (handler != NULL) return(handler);
1560 case XML_CHAR_ENCODING_8859_3:
1561 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1562 if (handler != NULL) return(handler);
1564 case XML_CHAR_ENCODING_8859_4:
1565 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1566 if (handler != NULL) return(handler);
1568 case XML_CHAR_ENCODING_8859_5:
1569 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1570 if (handler != NULL) return(handler);
1572 case XML_CHAR_ENCODING_8859_6:
1573 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1574 if (handler != NULL) return(handler);
1576 case XML_CHAR_ENCODING_8859_7:
1577 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1578 if (handler != NULL) return(handler);
1580 case XML_CHAR_ENCODING_8859_8:
1581 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1582 if (handler != NULL) return(handler);
1584 case XML_CHAR_ENCODING_8859_9:
1585 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1586 if (handler != NULL) return(handler);
1590 case XML_CHAR_ENCODING_2022_JP:
1591 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1592 if (handler != NULL) return(handler);
1594 case XML_CHAR_ENCODING_SHIFT_JIS:
1595 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1596 if (handler != NULL) return(handler);
1597 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1598 if (handler != NULL) return(handler);
1599 handler = xmlFindCharEncodingHandler("Shift_JIS");
1600 if (handler != NULL) return(handler);
1602 case XML_CHAR_ENCODING_EUC_JP:
1603 handler = xmlFindCharEncodingHandler("EUC-JP");
1604 if (handler != NULL) return(handler);
1610 #ifdef DEBUG_ENCODING
1611 xmlGenericError(xmlGenericErrorContext,
1612 "No handler found for encoding %d\n", enc);
1618 * xmlFindCharEncodingHandler:
1619 * @name: a string describing the char encoding.
1621 * Search in the registered set the handler able to read/write that encoding.
1623 * Returns the handler or NULL if not found
1625 xmlCharEncodingHandlerPtr
1626 xmlFindCharEncodingHandler(const char *name) {
1629 xmlCharEncoding alias;
1630 #ifdef LIBXML_ICONV_ENABLED
1631 xmlCharEncodingHandlerPtr enc;
1632 iconv_t icv_in, icv_out;
1633 #endif /* LIBXML_ICONV_ENABLED */
1634 #ifdef LIBXML_ICU_ENABLED
1635 xmlCharEncodingHandlerPtr encu;
1636 uconv_t *ucv_in, *ucv_out;
1637 #endif /* LIBXML_ICU_ENABLED */
1641 if (handlers == NULL) xmlInitCharEncodingHandlers();
1642 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1643 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1646 * Do the alias resolution
1649 nalias = xmlGetEncodingAlias(name);
1654 * Check first for directly registered encoding names
1656 for (i = 0;i < 99;i++) {
1657 upper[i] = toupper(name[i]);
1658 if (upper[i] == 0) break;
1662 if (handlers != NULL) {
1663 for (i = 0;i < nbCharEncodingHandler; i++) {
1664 if (!strcmp(upper, handlers[i]->name)) {
1665 #ifdef DEBUG_ENCODING
1666 xmlGenericError(xmlGenericErrorContext,
1667 "Found registered handler for encoding %s\n", name);
1669 return(handlers[i]);
1674 #ifdef LIBXML_ICONV_ENABLED
1675 /* check whether iconv can handle this */
1676 icv_in = iconv_open("UTF-8", name);
1677 icv_out = iconv_open(name, "UTF-8");
1678 if (icv_in == (iconv_t) -1) {
1679 icv_in = iconv_open("UTF-8", upper);
1681 if (icv_out == (iconv_t) -1) {
1682 icv_out = iconv_open(upper, "UTF-8");
1684 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1685 enc = (xmlCharEncodingHandlerPtr)
1686 xmlMalloc(sizeof(xmlCharEncodingHandler));
1688 iconv_close(icv_in);
1689 iconv_close(icv_out);
1692 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1693 enc->name = xmlMemStrdup(name);
1696 enc->iconv_in = icv_in;
1697 enc->iconv_out = icv_out;
1698 #ifdef DEBUG_ENCODING
1699 xmlGenericError(xmlGenericErrorContext,
1700 "Found iconv handler for encoding %s\n", name);
1703 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1704 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1705 "iconv : problems with filters for '%s'\n", name);
1707 #endif /* LIBXML_ICONV_ENABLED */
1708 #ifdef LIBXML_ICU_ENABLED
1709 /* check whether icu can handle this */
1710 ucv_in = openIcuConverter(name, 1);
1711 ucv_out = openIcuConverter(name, 0);
1712 if (ucv_in != NULL && ucv_out != NULL) {
1713 encu = (xmlCharEncodingHandlerPtr)
1714 xmlMalloc(sizeof(xmlCharEncodingHandler));
1716 closeIcuConverter(ucv_in);
1717 closeIcuConverter(ucv_out);
1720 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1721 encu->name = xmlMemStrdup(name);
1723 encu->output = NULL;
1724 encu->uconv_in = ucv_in;
1725 encu->uconv_out = ucv_out;
1726 #ifdef DEBUG_ENCODING
1727 xmlGenericError(xmlGenericErrorContext,
1728 "Found ICU converter handler for encoding %s\n", name);
1731 } else if (ucv_in != NULL || ucv_out != NULL) {
1732 closeIcuConverter(ucv_in);
1733 closeIcuConverter(ucv_out);
1734 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1735 "ICU converter : problems with filters for '%s'\n", name);
1737 #endif /* LIBXML_ICU_ENABLED */
1739 #ifdef DEBUG_ENCODING
1740 xmlGenericError(xmlGenericErrorContext,
1741 "No handler found for encoding %s\n", name);
1745 * Fallback using the canonical names
1747 alias = xmlParseCharEncoding(norig);
1748 if (alias != XML_CHAR_ENCODING_ERROR) {
1750 canon = xmlGetCharEncodingName(alias);
1751 if ((canon != NULL) && (strcmp(name, canon))) {
1752 return(xmlFindCharEncodingHandler(canon));
1756 /* If "none of the above", give up */
1760 /************************************************************************
1762 * ICONV based generic conversion functions *
1764 ************************************************************************/
1766 #ifdef LIBXML_ICONV_ENABLED
1769 * @cd: iconv converter data structure
1770 * @out: a pointer to an array of bytes to store the result
1771 * @outlen: the length of @out
1772 * @in: a pointer to an array of ISO Latin 1 chars
1773 * @inlen: the length of @in
1775 * Returns 0 if success, or
1776 * -1 by lack of space, or
1777 * -2 if the transcoding fails (for *in is not valid utf8 string or
1778 * the result of transformation can't fit into the encoding we want), or
1779 * -3 if there the last byte can't form a single output char.
1781 * The value of @inlen after return is the number of octets consumed
1782 * as the return value is positive, else unpredictable.
1783 * The value of @outlen after return is the number of ocetes consumed.
1786 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1787 const unsigned char *in, int *inlen) {
1788 size_t icv_inlen, icv_outlen;
1789 const char *icv_in = (const char *) in;
1790 char *icv_out = (char *) out;
1793 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1794 if (outlen != NULL) *outlen = 0;
1798 icv_outlen = *outlen;
1799 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1800 *inlen -= icv_inlen;
1801 *outlen -= icv_outlen;
1802 if ((icv_inlen != 0) || (ret == -1)) {
1804 if (errno == EILSEQ) {
1809 if (errno == E2BIG) {
1814 if (errno == EINVAL) {
1824 #endif /* LIBXML_ICONV_ENABLED */
1826 /************************************************************************
1828 * ICU based generic conversion functions *
1830 ************************************************************************/
1832 #ifdef LIBXML_ICU_ENABLED
1835 * @cd: ICU uconverter data structure
1836 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1837 * @out: a pointer to an array of bytes to store the result
1838 * @outlen: the length of @out
1839 * @in: a pointer to an array of ISO Latin 1 chars
1840 * @inlen: the length of @in
1842 * Returns 0 if success, or
1843 * -1 by lack of space, or
1844 * -2 if the transcoding fails (for *in is not valid utf8 string or
1845 * the result of transformation can't fit into the encoding we want), or
1846 * -3 if there the last byte can't form a single output char.
1848 * The value of @inlen after return is the number of octets consumed
1849 * as the return value is positive, else unpredictable.
1850 * The value of @outlen after return is the number of ocetes consumed.
1853 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1854 const unsigned char *in, int *inlen) {
1855 const char *ucv_in = (const char *) in;
1856 char *ucv_out = (char *) out;
1857 UErrorCode err = U_ZERO_ERROR;
1859 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1860 if (outlen != NULL) *outlen = 0;
1866 * 1. is ucnv_convert(To|From)Algorithmic better?
1867 * 2. had we better use an explicit pivot buffer?
1868 * 3. error returned comes from 'fromUnicode' only even
1869 * when toUnicode is true !
1872 /* encoding => UTF-16 => UTF-8 */
1873 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1874 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1877 /* UTF-8 => UTF-16 => encoding */
1878 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1879 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1882 *inlen = ucv_in - (const char*) in;
1883 *outlen = ucv_out - (char *) out;
1886 if (err == U_BUFFER_OVERFLOW_ERROR)
1888 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1890 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1893 #endif /* LIBXML_ICU_ENABLED */
1895 /************************************************************************
1897 * The real API used by libxml for on-the-fly conversion *
1899 ************************************************************************/
1901 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1902 xmlBufferPtr in, int len);
1905 * xmlCharEncFirstLineInt:
1906 * @handler: char enconding transformation data structure
1907 * @out: an xmlBuffer for the output.
1908 * @in: an xmlBuffer for the input
1909 * @len: number of bytes to convert for the first line, or -1
1911 * Front-end for the encoding handler input function, but handle only
1912 * the very first line, i.e. limit itself to 45 chars.
1914 * Returns the number of byte written if success, or
1916 * -2 if the transcoding fails (for *in is not valid utf8 string or
1917 * the result of transformation can't fit into the encoding we want), or
1920 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1921 xmlBufferPtr in, int len) {
1926 if (handler == NULL) return(-1);
1927 if (out == NULL) return(-1);
1928 if (in == NULL) return(-1);
1930 /* calculate space available */
1931 written = out->size - out->use - 1; /* count '\0' */
1934 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1935 * 45 chars should be sufficient to reach the end of the encoding
1936 * declaration without going too far inside the document content.
1937 * on UTF-16 this means 90bytes, on UCS4 this means 180
1938 * The actual value depending on guessed encoding is passed as @len
1948 if (toconv * 2 >= written) {
1949 xmlBufferGrow(out, toconv);
1950 written = out->size - out->use - 1;
1953 if (handler->input != NULL) {
1954 ret = handler->input(&out->content[out->use], &written,
1955 in->content, &toconv);
1956 xmlBufferShrink(in, toconv);
1957 out->use += written;
1958 out->content[out->use] = 0;
1960 #ifdef LIBXML_ICONV_ENABLED
1961 else if (handler->iconv_in != NULL) {
1962 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1963 &written, in->content, &toconv);
1964 xmlBufferShrink(in, toconv);
1965 out->use += written;
1966 out->content[out->use] = 0;
1967 if (ret == -1) ret = -3;
1969 #endif /* LIBXML_ICONV_ENABLED */
1970 #ifdef LIBXML_ICU_ENABLED
1971 else if (handler->uconv_in != NULL) {
1972 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1973 &written, in->content, &toconv);
1974 xmlBufferShrink(in, toconv);
1975 out->use += written;
1976 out->content[out->use] = 0;
1977 if (ret == -1) ret = -3;
1979 #endif /* LIBXML_ICU_ENABLED */
1980 #ifdef DEBUG_ENCODING
1983 xmlGenericError(xmlGenericErrorContext,
1984 "converted %d bytes to %d bytes of input\n",
1988 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1989 toconv, written, in->use);
1992 xmlGenericError(xmlGenericErrorContext,
1993 "input conversion failed due to input error\n");
1996 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1997 toconv, written, in->use);
2000 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2002 #endif /* DEBUG_ENCODING */
2004 * Ignore when input buffer is not on a boundary
2006 if (ret == -3) ret = 0;
2007 if (ret == -1) ret = 0;
2012 * xmlCharEncFirstLine:
2013 * @handler: char enconding transformation data structure
2014 * @out: an xmlBuffer for the output.
2015 * @in: an xmlBuffer for the input
2017 * Front-end for the encoding handler input function, but handle only
2018 * the very first line, i.e. limit itself to 45 chars.
2020 * Returns the number of byte written if success, or
2022 * -2 if the transcoding fails (for *in is not valid utf8 string or
2023 * the result of transformation can't fit into the encoding we want), or
2026 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2028 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2033 * @handler: char encoding transformation data structure
2034 * @out: an xmlBuffer for the output.
2035 * @in: an xmlBuffer for the input
2037 * Generic front-end for the encoding handler input function
2039 * Returns the number of byte written if success, or
2041 * -2 if the transcoding fails (for *in is not valid utf8 string or
2042 * the result of transformation can't fit into the encoding we want), or
2045 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2052 if (handler == NULL)
2062 written = out->size - out->use -1; /* count '\0' */
2063 if (toconv * 2 >= written) {
2064 xmlBufferGrow(out, out->size + toconv * 2);
2065 written = out->size - out->use - 1;
2067 if (handler->input != NULL) {
2068 ret = handler->input(&out->content[out->use], &written,
2069 in->content, &toconv);
2070 xmlBufferShrink(in, toconv);
2071 out->use += written;
2072 out->content[out->use] = 0;
2074 #ifdef LIBXML_ICONV_ENABLED
2075 else if (handler->iconv_in != NULL) {
2076 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2077 &written, in->content, &toconv);
2078 xmlBufferShrink(in, toconv);
2079 out->use += written;
2080 out->content[out->use] = 0;
2084 #endif /* LIBXML_ICONV_ENABLED */
2085 #ifdef LIBXML_ICU_ENABLED
2086 else if (handler->uconv_in != NULL) {
2087 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2088 &written, in->content, &toconv);
2089 xmlBufferShrink(in, toconv);
2090 out->use += written;
2091 out->content[out->use] = 0;
2095 #endif /* LIBXML_ICU_ENABLED */
2098 #ifdef DEBUG_ENCODING
2099 xmlGenericError(xmlGenericErrorContext,
2100 "converted %d bytes to %d bytes of input\n",
2105 #ifdef DEBUG_ENCODING
2106 xmlGenericError(xmlGenericErrorContext,
2107 "converted %d bytes to %d bytes of input, %d left\n",
2108 toconv, written, in->use);
2112 #ifdef DEBUG_ENCODING
2113 xmlGenericError(xmlGenericErrorContext,
2114 "converted %d bytes to %d bytes of input, %d left\n",
2115 toconv, written, in->use);
2121 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2122 in->content[0], in->content[1],
2123 in->content[2], in->content[3]);
2125 xmlEncodingErr(XML_I18N_CONV_FAILED,
2126 "input conversion failed due to input error, bytes %s\n",
2131 * Ignore when input buffer is not on a boundary
2135 return (written? written : ret);
2139 * xmlCharEncOutFunc:
2140 * @handler: char enconding transformation data structure
2141 * @out: an xmlBuffer for the output.
2142 * @in: an xmlBuffer for the input
2144 * Generic front-end for the encoding handler output function
2145 * a first call with @in == NULL has to be made firs to initiate the
2146 * output in case of non-stateless encoding needing to initiate their
2147 * state or the output (like the BOM in UTF16).
2148 * In case of UTF8 sequence conversion errors for the given encoder,
2149 * the content will be automatically remapped to a CharRef sequence.
2151 * Returns the number of byte written if success, or
2153 * -2 if the transcoding fails (for *in is not valid utf8 string or
2154 * the result of transformation can't fit into the encoding we want), or
2157 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2164 int charref_len = 0;
2166 if (handler == NULL) return(-1);
2167 if (out == NULL) return(-1);
2171 written = out->size - out->use;
2174 written--; /* Gennady: count '/0' */
2177 * First specific handling of in = NULL, i.e. the initialization call
2181 if (handler->output != NULL) {
2182 ret = handler->output(&out->content[out->use], &written,
2184 if (ret >= 0) { /* Gennady: check return value */
2185 out->use += written;
2186 out->content[out->use] = 0;
2189 #ifdef LIBXML_ICONV_ENABLED
2190 else if (handler->iconv_out != NULL) {
2191 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2192 &written, NULL, &toconv);
2193 out->use += written;
2194 out->content[out->use] = 0;
2196 #endif /* LIBXML_ICONV_ENABLED */
2197 #ifdef LIBXML_ICU_ENABLED
2198 else if (handler->uconv_out != NULL) {
2199 ret = xmlUconvWrapper(handler->uconv_out, 0,
2200 &out->content[out->use],
2201 &written, NULL, &toconv);
2202 out->use += written;
2203 out->content[out->use] = 0;
2205 #endif /* LIBXML_ICU_ENABLED */
2206 #ifdef DEBUG_ENCODING
2207 xmlGenericError(xmlGenericErrorContext,
2208 "initialized encoder\n");
2214 * Conversion itself.
2219 if (toconv * 4 >= written) {
2220 xmlBufferGrow(out, toconv * 4);
2221 written = out->size - out->use - 1;
2223 if (handler->output != NULL) {
2224 ret = handler->output(&out->content[out->use], &written,
2225 in->content, &toconv);
2227 xmlBufferShrink(in, toconv);
2228 out->use += written;
2229 writtentot += written;
2231 out->content[out->use] = 0;
2233 #ifdef LIBXML_ICONV_ENABLED
2234 else if (handler->iconv_out != NULL) {
2235 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2236 &written, in->content, &toconv);
2237 xmlBufferShrink(in, toconv);
2238 out->use += written;
2239 writtentot += written;
2240 out->content[out->use] = 0;
2244 * Can be a limitation of iconv
2252 #endif /* LIBXML_ICONV_ENABLED */
2253 #ifdef LIBXML_ICU_ENABLED
2254 else if (handler->uconv_out != NULL) {
2255 ret = xmlUconvWrapper(handler->uconv_out, 0,
2256 &out->content[out->use],
2257 &written, in->content, &toconv);
2258 xmlBufferShrink(in, toconv);
2259 out->use += written;
2260 writtentot += written;
2261 out->content[out->use] = 0;
2265 * Can be a limitation of iconv
2273 #endif /* LIBXML_ICU_ENABLED */
2275 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2276 "xmlCharEncOutFunc: no output function !\n", NULL);
2280 if (ret >= 0) output += ret;
2283 * Attempt to handle error cases
2287 #ifdef DEBUG_ENCODING
2288 xmlGenericError(xmlGenericErrorContext,
2289 "converted %d bytes to %d bytes of output\n",
2294 #ifdef DEBUG_ENCODING
2295 xmlGenericError(xmlGenericErrorContext,
2296 "output conversion failed by lack of space\n");
2300 #ifdef DEBUG_ENCODING
2301 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2302 toconv, written, in->use);
2307 const xmlChar *utf = (const xmlChar *) in->content;
2310 cur = xmlGetUTF8Char(utf, &len);
2311 if ((charref_len != 0) && (written < charref_len)) {
2313 * We attempted to insert a character reference and failed.
2314 * Undo what was written and skip the remaining charref.
2316 out->use -= written;
2317 writtentot -= written;
2318 xmlBufferShrink(in, charref_len - written);
2323 } else if (cur > 0) {
2324 xmlChar charref[20];
2326 #ifdef DEBUG_ENCODING
2327 xmlGenericError(xmlGenericErrorContext,
2328 "handling output conversion error\n");
2329 xmlGenericError(xmlGenericErrorContext,
2330 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2331 in->content[0], in->content[1],
2332 in->content[2], in->content[3]);
2335 * Removes the UTF8 sequence, and replace it by a charref
2336 * and continue the transcoding phase, hoping the error
2337 * did not mangle the encoder state.
2339 charref_len = snprintf((char *) &charref[0], sizeof(charref),
2341 xmlBufferShrink(in, len);
2342 xmlBufferAddHead(in, charref, -1);
2348 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2349 in->content[0], in->content[1],
2350 in->content[2], in->content[3]);
2352 xmlEncodingErr(XML_I18N_CONV_FAILED,
2353 "output conversion failed due to conv error, bytes %s\n",
2355 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2356 in->content[0] = ' ';
2365 * xmlCharEncCloseFunc:
2366 * @handler: char enconding transformation data structure
2368 * Generic front-end for encoding handler close function
2370 * Returns 0 if success, or -1 in case of error
2373 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2376 if (handler == NULL) return(-1);
2377 if (handler->name == NULL) return(-1);
2378 #ifdef LIBXML_ICONV_ENABLED
2380 * Iconv handlers can be used only once, free the whole block.
2381 * and the associated icon resources.
2383 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2385 if (handler->iconv_out != NULL) {
2386 if (iconv_close(handler->iconv_out))
2388 handler->iconv_out = NULL;
2390 if (handler->iconv_in != NULL) {
2391 if (iconv_close(handler->iconv_in))
2393 handler->iconv_in = NULL;
2396 #endif /* LIBXML_ICONV_ENABLED */
2397 #ifdef LIBXML_ICU_ENABLED
2398 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2400 if (handler->uconv_out != NULL) {
2401 closeIcuConverter(handler->uconv_out);
2402 handler->uconv_out = NULL;
2404 if (handler->uconv_in != NULL) {
2405 closeIcuConverter(handler->uconv_in);
2406 handler->uconv_in = NULL;
2411 /* free up only dynamic handlers iconv/uconv */
2412 if (handler->name != NULL)
2413 xmlFree(handler->name);
2414 handler->name = NULL;
2417 #ifdef DEBUG_ENCODING
2419 xmlGenericError(xmlGenericErrorContext,
2420 "failed to close the encoding handler\n");
2422 xmlGenericError(xmlGenericErrorContext,
2423 "closed the encoding handler\n");
2431 * @ctxt: an XML parser context
2433 * This function provides the current index of the parser relative
2434 * to the start of the current entity. This function is computed in
2435 * bytes from the beginning starting at zero and finishing at the
2436 * size in byte of the file if parsing a file. The function is
2437 * of constant cost if the input is UTF-8 but can be costly if run
2438 * on non-UTF-8 input.
2440 * Returns the index in bytes from the beginning of the entity or -1
2441 * in case the index could not be computed.
2444 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2445 xmlParserInputPtr in;
2447 if (ctxt == NULL) return(-1);
2449 if (in == NULL) return(-1);
2450 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2451 unsigned int unused = 0;
2452 xmlCharEncodingHandler * handler = in->buf->encoder;
2454 * Encoding conversion, compute the number of unused original
2455 * bytes from the input not consumed and substract that from
2456 * the raw consumed value, this is not a cheap operation
2458 if (in->end - in->cur > 0) {
2459 unsigned char convbuf[32000];
2460 const unsigned char *cur = (const unsigned char *)in->cur;
2461 int toconv = in->end - in->cur, written = 32000;
2465 if (handler->output != NULL) {
2467 toconv = in->end - cur;
2469 ret = handler->output(&convbuf[0], &written,
2471 if (ret == -1) return(-1);
2474 } while (ret == -2);
2475 #ifdef LIBXML_ICONV_ENABLED
2476 } else if (handler->iconv_out != NULL) {
2478 toconv = in->end - cur;
2480 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2481 &written, cur, &toconv);
2490 } while (ret == -2);
2492 #ifdef LIBXML_ICU_ENABLED
2493 } else if (handler->uconv_out != NULL) {
2495 toconv = in->end - cur;
2497 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2498 &written, cur, &toconv);
2507 } while (ret == -2);
2510 /* could not find a converter */
2514 if (in->buf->rawconsumed < unused)
2516 return(in->buf->rawconsumed - unused);
2518 return(in->consumed + (in->cur - in->base));
2521 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2522 #ifdef LIBXML_ISO8859X_ENABLED
2526 * @out: a pointer to an array of bytes to store the result
2527 * @outlen: the length of @out
2528 * @in: a pointer to an array of UTF-8 chars
2529 * @inlen: the length of @in
2530 * @xlattable: the 2-level transcoding table
2532 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2533 * block of chars out.
2535 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2536 * The value of @inlen after return is the number of octets consumed
2537 * as the return value is positive, else unpredictable.
2538 * The value of @outlen after return is the number of ocetes consumed.
2541 UTF8ToISO8859x(unsigned char* out, int *outlen,
2542 const unsigned char* in, int *inlen,
2543 unsigned char const *xlattable) {
2544 const unsigned char* outstart = out;
2545 const unsigned char* inend;
2546 const unsigned char* instart = in;
2547 const unsigned char* processed = in;
2549 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2550 (xlattable == NULL))
2554 * initialization nothing to do
2560 inend = in + (*inlen);
2561 while (in < inend) {
2562 unsigned char d = *in++;
2565 } else if (d < 0xC0) {
2566 /* trailing byte in leading position */
2567 *outlen = out - outstart;
2568 *inlen = processed - instart;
2570 } else if (d < 0xE0) {
2572 if (!(in < inend)) {
2573 /* trailing byte not in input buffer */
2574 *outlen = out - outstart;
2575 *inlen = processed - instart;
2579 if ((c & 0xC0) != 0x80) {
2580 /* not a trailing byte */
2581 *outlen = out - outstart;
2582 *inlen = processed - instart;
2587 d = xlattable [48 + c + xlattable [d] * 64];
2589 /* not in character set */
2590 *outlen = out - outstart;
2591 *inlen = processed - instart;
2595 } else if (d < 0xF0) {
2598 if (!(in < inend - 1)) {
2599 /* trailing bytes not in input buffer */
2600 *outlen = out - outstart;
2601 *inlen = processed - instart;
2605 if ((c1 & 0xC0) != 0x80) {
2606 /* not a trailing byte (c1) */
2607 *outlen = out - outstart;
2608 *inlen = processed - instart;
2612 if ((c2 & 0xC0) != 0x80) {
2613 /* not a trailing byte (c2) */
2614 *outlen = out - outstart;
2615 *inlen = processed - instart;
2621 d = xlattable [48 + c2 + xlattable [48 + c1 +
2622 xlattable [32 + d] * 64] * 64];
2624 /* not in character set */
2625 *outlen = out - outstart;
2626 *inlen = processed - instart;
2631 /* cannot transcode >= U+010000 */
2632 *outlen = out - outstart;
2633 *inlen = processed - instart;
2638 *outlen = out - outstart;
2639 *inlen = processed - instart;
2645 * @out: a pointer to an array of bytes to store the result
2646 * @outlen: the length of @out
2647 * @in: a pointer to an array of ISO Latin 1 chars
2648 * @inlen: the length of @in
2650 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2651 * block of chars out.
2652 * Returns 0 if success, or -1 otherwise
2653 * The value of @inlen after return is the number of octets consumed
2654 * The value of @outlen after return is the number of ocetes produced.
2657 ISO8859xToUTF8(unsigned char* out, int *outlen,
2658 const unsigned char* in, int *inlen,
2659 unsigned short const *unicodetable) {
2660 unsigned char* outstart = out;
2661 unsigned char* outend;
2662 const unsigned char* instart = in;
2663 const unsigned char* inend;
2664 const unsigned char* instop;
2667 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2668 (in == NULL) || (unicodetable == NULL))
2670 outend = out + *outlen;
2671 inend = in + *inlen;
2674 while ((in < inend) && (out < outend - 2)) {
2676 c = unicodetable [*in - 0x80];
2678 /* undefined code point */
2679 *outlen = out - outstart;
2680 *inlen = in - instart;
2684 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2685 *out++ = (c & 0x3F) | 0x80;
2687 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2688 *out++ = ((c >> 6) & 0x3F) | 0x80;
2689 *out++ = (c & 0x3F) | 0x80;
2693 if (instop - in > outend - out) instop = in + (outend - out);
2694 while ((*in < 0x80) && (in < instop)) {
2698 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2701 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2704 *outlen = out - outstart;
2705 *inlen = in - instart;
2710 /************************************************************************
2711 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2712 ************************************************************************/
2714 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2715 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2716 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2717 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2718 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2719 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2720 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2721 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2722 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2723 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2724 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2725 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2726 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2727 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2728 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2729 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2730 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2733 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2734 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2742 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2743 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2744 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2745 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2746 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2750 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2753 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2754 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2755 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2756 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2757 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2758 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2759 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2760 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2763 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2764 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2765 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2766 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2767 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2768 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2769 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2770 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2771 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2772 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2773 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2774 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2775 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2776 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2777 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2778 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2779 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2782 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2783 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2791 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2792 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2793 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2794 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2796 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2797 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2810 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2811 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2812 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2813 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2816 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2817 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2818 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2819 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2820 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2821 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2822 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2823 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2824 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2825 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2826 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2827 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2828 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2829 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2830 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2831 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2832 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2835 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2836 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2843 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2844 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2845 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2846 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2847 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2848 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2849 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2850 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2851 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2852 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2854 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2860 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2861 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2862 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2865 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2866 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2867 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2868 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2869 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2870 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2871 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2872 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2873 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2874 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2875 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2876 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2877 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2878 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2879 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2880 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2881 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2884 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2885 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2893 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2894 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2897 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2898 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2899 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2900 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2901 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2902 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2904 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2915 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2916 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2917 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2918 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2919 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2920 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2921 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2922 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2923 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2924 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2925 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2926 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2927 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2928 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2929 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2930 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2933 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2934 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2942 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2943 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2948 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2951 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2952 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2953 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2954 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2960 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2961 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2962 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2963 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2964 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2965 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2966 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2967 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2968 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2969 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2970 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2971 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2972 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2973 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2974 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2975 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2978 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2979 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2987 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2988 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2989 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2990 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3003 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3004 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3005 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3006 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3013 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3014 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3015 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3016 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3017 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3018 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3019 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3020 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3021 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3022 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3023 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3024 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3025 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3026 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3027 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3028 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3031 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3032 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3040 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3041 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3042 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3043 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3051 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3056 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3061 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3066 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3067 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3068 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3069 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3070 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3071 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3072 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3073 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3074 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3075 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3076 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3077 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3078 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3079 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3080 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3081 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3084 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3085 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3093 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3094 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3095 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3096 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3097 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3098 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3099 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3102 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3111 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3112 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3113 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3114 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3115 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3116 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3117 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3118 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3119 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3120 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3121 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3122 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3123 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3124 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3125 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3126 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3129 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3130 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3138 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3139 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3140 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3142 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3143 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3144 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3145 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3146 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3148 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3149 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3158 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3159 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3160 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3163 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3164 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3165 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3166 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3167 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3168 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3169 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3170 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3171 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3172 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3173 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3174 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3175 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3176 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3177 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3178 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3179 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3182 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3183 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3191 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3192 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3198 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3199 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3200 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3201 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3202 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3207 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3208 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3213 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3214 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3215 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3216 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3217 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3218 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3219 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3220 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3221 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3222 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3223 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3224 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3225 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3226 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3227 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3228 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3231 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3232 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3240 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3241 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3242 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3243 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3249 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3252 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3253 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3254 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3255 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3257 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3258 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3259 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3260 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3262 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3265 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3266 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3267 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3268 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3269 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3270 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3271 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3272 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3273 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3274 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3275 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3276 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3277 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3278 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3279 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3280 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3281 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3284 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3285 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3293 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3294 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3295 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3300 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3320 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3326 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3330 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3331 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3332 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3333 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3334 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3335 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3336 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3337 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3338 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3339 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3340 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3341 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3342 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3343 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3344 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3345 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3346 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3349 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3350 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3358 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3359 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3360 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3361 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3373 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3374 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3375 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3376 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3379 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3380 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3381 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3382 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3383 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3384 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3385 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3386 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3387 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3388 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3389 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3390 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3391 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3392 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3393 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3394 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3395 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3398 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3399 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3407 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3408 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3409 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3410 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3411 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3416 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3418 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3435 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3436 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3437 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3442 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3445 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3446 const unsigned char* in, int *inlen) {
3447 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3449 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3450 const unsigned char* in, int *inlen) {
3451 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3454 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3455 const unsigned char* in, int *inlen) {
3456 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3458 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3459 const unsigned char* in, int *inlen) {
3460 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3463 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3464 const unsigned char* in, int *inlen) {
3465 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3467 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3468 const unsigned char* in, int *inlen) {
3469 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3472 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3473 const unsigned char* in, int *inlen) {
3474 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3476 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3477 const unsigned char* in, int *inlen) {
3478 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3481 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3482 const unsigned char* in, int *inlen) {
3483 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3485 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3486 const unsigned char* in, int *inlen) {
3487 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3490 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3491 const unsigned char* in, int *inlen) {
3492 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3494 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3495 const unsigned char* in, int *inlen) {
3496 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3499 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3500 const unsigned char* in, int *inlen) {
3501 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3503 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3504 const unsigned char* in, int *inlen) {
3505 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3508 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3509 const unsigned char* in, int *inlen) {
3510 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3512 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3513 const unsigned char* in, int *inlen) {
3514 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3517 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3518 const unsigned char* in, int *inlen) {
3519 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3521 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3522 const unsigned char* in, int *inlen) {
3523 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3526 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3527 const unsigned char* in, int *inlen) {
3528 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3530 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3531 const unsigned char* in, int *inlen) {
3532 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3535 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3536 const unsigned char* in, int *inlen) {
3537 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3539 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3540 const unsigned char* in, int *inlen) {
3541 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3544 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3545 const unsigned char* in, int *inlen) {
3546 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3548 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3549 const unsigned char* in, int *inlen) {
3550 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3553 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3554 const unsigned char* in, int *inlen) {
3555 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3557 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3558 const unsigned char* in, int *inlen) {
3559 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3562 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3563 const unsigned char* in, int *inlen) {
3564 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3566 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3567 const unsigned char* in, int *inlen) {
3568 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3572 xmlRegisterCharEncodingHandlersISO8859x (void) {
3573 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3574 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3575 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3576 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3577 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3578 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3579 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3580 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3581 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3582 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3583 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3584 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3585 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3586 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3592 #define bottom_encoding
3593 #include "elfgcchack.h"