encoding.c

   1 /*
   2  * encoding.c : implements the encoding conversion functions needed for XML
   3  *
   4  * Related specs:
   5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
   6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
   7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
   8  * [ISO-8859-1]   ISO Latin-1 characters codes.
   9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
  10  *                Worldwide Character Encoding -- Version 1.0", Addison-
  11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
  12  *                described in Unicode Technical Report #4.
  13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
  14  *                Information Interchange, ANSI X3.4-1986.
  15  *
  16  * See Copyright for the status of this software.
  17  *
  18  * daniel@veillard.com
  19  *
  20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
  21  */
  22
  23 #define IN_LIBXML
  24 #include "libxml.h"
  25
  26 #include <string.h>
  27
  28 #ifdef HAVE_CTYPE_H
  29 #include <ctype.h>
  30 #endif
  31 #ifdef HAVE_STDLIB_H
  32 #include <stdlib.h>
  33 #endif
  34 #ifdef LIBXML_ICONV_ENABLED
  35 #ifdef HAVE_ERRNO_H
  36 #include <errno.h>
  37 #endif
  38 #endif
  39 #include <libxml/encoding.h>
  40 #include <libxml/xmlmemory.h>
  41 #ifdef LIBXML_HTML_ENABLED
  42 #include <libxml/HTMLparser.h>
  43 #endif
  44 #include <libxml/globals.h>
  45 #include <libxml/xmlerror.h>
  46
  47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
  48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
  49
  50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
  51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
  52 struct _xmlCharEncodingAlias {
  53     const char *name;
  54     const char *alias;
  55 };
  56
  57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
  58 static int xmlCharEncodingAliasesNb = 0;
  59 static int xmlCharEncodingAliasesMax = 0;
  60
  61 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
  62 #if 0
  63 #define DEBUG_ENCODING  /* Define this to get encoding traces */
  64 #endif
  65 #else
  66 #ifdef LIBXML_ISO8859X_ENABLED
  67 static void xmlRegisterCharEncodingHandlersISO8859x (void);
  68 #endif
  69 #endif
  70
  71 static int xmlLittleEndian = 1;
  72
  73 /**
  74  * xmlEncodingErrMemory:
  75  * @extra:  extra informations
  76  *
  77  * Handle an out of memory condition
  78  */
  79 static void
  80 xmlEncodingErrMemory(const char *extra)
  81 {
  82     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  83 }
  84
  85 /**
  86  * xmlErrEncoding:
  87  * @error:  the error number
  88  * @msg:  the error message
  89  *
  90  * n encoding error
  91  */
  92 static void
  93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
  94 {
  95     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
  96                     XML_FROM_I18N, error, XML_ERR_FATAL,
  97                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
  98 }
  99
 100 #ifdef LIBXML_ICU_ENABLED
 101 static uconv_t*
 102 openIcuConverter(const char* name, int toUnicode)
 103 {
 104   UErrorCode status = U_ZERO_ERROR;
 105   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
 106   if (conv == NULL)
 107     return NULL;
 108
 109   conv->uconv = ucnv_open(name, &status);
 110   if (U_FAILURE(status))
 111     goto error;
 112
 113   status = U_ZERO_ERROR;
 114   if (toUnicode) {
 115     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
 116                         NULL, NULL, NULL, &status);
 117   }
 118   else {
 119     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
 120                         NULL, NULL, NULL, &status);
 121   }
 122   if (U_FAILURE(status))
 123     goto error;
 124
 125   status = U_ZERO_ERROR;
 126   conv->utf8 = ucnv_open("UTF-8", &status);
 127   if (U_SUCCESS(status))
 128     return conv;
 129
 130 error:
 131   if (conv->uconv)
 132     ucnv_close(conv->uconv);
 133   xmlFree(conv);
 134   return NULL;
 135 }
 136
 137 static void
 138 closeIcuConverter(uconv_t *conv)
 139 {
 140   if (conv != NULL) {
 141     ucnv_close(conv->uconv);
 142     ucnv_close(conv->utf8);
 143     xmlFree(conv);
 144   }
 145 }
 146 #endif /* LIBXML_ICU_ENABLED */
 147
 148 /************************************************************************
 149  *                                                                      *
 150  *              Conversions To/From UTF8 encoding                       *
 151  *                                                                      *
 152  ************************************************************************/
 153
 154 /**
 155  * asciiToUTF8:
 156  * @out:  a pointer to an array of bytes to store the result
 157  * @outlen:  the length of @out
 158  * @in:  a pointer to an array of ASCII chars
 159  * @inlen:  the length of @in
 160  *
 161  * Take a block of ASCII chars in and try to convert it to an UTF-8
 162  * block of chars out.
 163  * Returns 0 if success, or -1 otherwise
 164  * The value of @inlen after return is the number of octets consumed
 165  *     if the return value is positive, else unpredictable.
 166  * The value of @outlen after return is the number of octets consumed.
 167  */
 168 static int
 169 asciiToUTF8(unsigned char* out, int *outlen,
 170               const unsigned char* in, int *inlen) {
 171     unsigned char* outstart = out;
 172     const unsigned char* base = in;
 173     const unsigned char* processed = in;
 174     unsigned char* outend = out + *outlen;
 175     const unsigned char* inend;
 176     unsigned int c;
 177
 178     inend = in + (*inlen);
 179     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 180         c= *in++;
 181
 182         if (out >= outend)
 183             break;
 184         if (c < 0x80) {
 185             *out++ = c;
 186         } else {
 187             *outlen = out - outstart;
 188             *inlen = processed - base;
 189             return(-1);
 190         }
 191
 192         processed = (const unsigned char*) in;
 193     }
 194     *outlen = out - outstart;
 195     *inlen = processed - base;
 196     return(*outlen);
 197 }
 198
 199 #ifdef LIBXML_OUTPUT_ENABLED
 200 /**
 201  * UTF8Toascii:
 202  * @out:  a pointer to an array of bytes to store the result
 203  * @outlen:  the length of @out
 204  * @in:  a pointer to an array of UTF-8 chars
 205  * @inlen:  the length of @in
 206  *
 207  * Take a block of UTF-8 chars in and try to convert it to an ASCII
 208  * block of chars out.
 209  *
 210  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 211  * The value of @inlen after return is the number of octets consumed
 212  *     if the return value is positive, else unpredictable.
 213  * The value of @outlen after return is the number of octets consumed.
 214  */
 215 static int
 216 UTF8Toascii(unsigned char* out, int *outlen,
 217               const unsigned char* in, int *inlen) {
 218     const unsigned char* processed = in;
 219     const unsigned char* outend;
 220     const unsigned char* outstart = out;
 221     const unsigned char* instart = in;
 222     const unsigned char* inend;
 223     unsigned int c, d;
 224     int trailing;
 225
 226     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 227     if (in == NULL) {
 228         /*
 229          * initialization nothing to do
 230          */
 231         *outlen = 0;
 232         *inlen = 0;
 233         return(0);
 234     }
 235     inend = in + (*inlen);
 236     outend = out + (*outlen);
 237     while (in < inend) {
 238         d = *in++;
 239         if      (d < 0x80)  { c= d; trailing= 0; }
 240         else if (d < 0xC0) {
 241             /* trailing byte in leading position */
 242             *outlen = out - outstart;
 243             *inlen = processed - instart;
 244             return(-2);
 245         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 246         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 247         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 248         else {
 249             /* no chance for this in Ascii */
 250             *outlen = out - outstart;
 251             *inlen = processed - instart;
 252             return(-2);
 253         }
 254
 255         if (inend - in < trailing) {
 256             break;
 257         }
 258
 259         for ( ; trailing; trailing--) {
 260             if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 261                 break;
 262             c <<= 6;
 263             c |= d & 0x3F;
 264         }
 265
 266         /* assertion: c is a single UTF-4 value */
 267         if (c < 0x80) {
 268             if (out >= outend)
 269                 break;
 270             *out++ = c;
 271         } else {
 272             /* no chance for this in Ascii */
 273             *outlen = out - outstart;
 274             *inlen = processed - instart;
 275             return(-2);
 276         }
 277         processed = in;
 278     }
 279     *outlen = out - outstart;
 280     *inlen = processed - instart;
 281     return(*outlen);
 282 }
 283 #endif /* LIBXML_OUTPUT_ENABLED */
 284
 285 /**
 286  * isolat1ToUTF8:
 287  * @out:  a pointer to an array of bytes to store the result
 288  * @outlen:  the length of @out
 289  * @in:  a pointer to an array of ISO Latin 1 chars
 290  * @inlen:  the length of @in
 291  *
 292  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 293  * block of chars out.
 294  * Returns the number of bytes written if success, or -1 otherwise
 295  * The value of @inlen after return is the number of octets consumed
 296  *     if the return value is positive, else unpredictable.
 297  * The value of @outlen after return is the number of octets consumed.
 298  */
 299 int
 300 isolat1ToUTF8(unsigned char* out, int *outlen,
 301               const unsigned char* in, int *inlen) {
 302     unsigned char* outstart = out;
 303     const unsigned char* base = in;
 304     unsigned char* outend;
 305     const unsigned char* inend;
 306     const unsigned char* instop;
 307
 308     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
 309         return(-1);
 310
 311     outend = out + *outlen;
 312     inend = in + (*inlen);
 313     instop = inend;
 314
 315     while ((in < inend) && (out < outend - 1)) {
 316         if (*in >= 0x80) {
 317             *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
 318             *out++ = ((*in) & 0x3F) | 0x80;
 319             ++in;
 320         }
 321         if ((instop - in) > (outend - out)) instop = in + (outend - out);
 322         while ((in < instop) && (*in < 0x80)) {
 323             *out++ = *in++;
 324         }
 325     }
 326     if ((in < inend) && (out < outend) && (*in < 0x80)) {
 327         *out++ = *in++;
 328     }
 329     *outlen = out - outstart;
 330     *inlen = in - base;
 331     return(*outlen);
 332 }
 333
 334 /**
 335  * UTF8ToUTF8:
 336  * @out:  a pointer to an array of bytes to store the result
 337  * @outlen:  the length of @out
 338  * @inb:  a pointer to an array of UTF-8 chars
 339  * @inlenb:  the length of @in in UTF-8 chars
 340  *
 341  * No op copy operation for UTF8 handling.
 342  *
 343  * Returns the number of bytes written, or -1 if lack of space.
 344  *     The value of *inlen after return is the number of octets consumed
 345  *     if the return value is positive, else unpredictable.
 346  */
 347 static int
 348 UTF8ToUTF8(unsigned char* out, int *outlen,
 349            const unsigned char* inb, int *inlenb)
 350 {
 351     int len;
 352
 353     if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
 354         return(-1);
 355     if (*outlen > *inlenb) {
 356         len = *inlenb;
 357     } else {
 358         len = *outlen;
 359     }
 360     if (len < 0)
 361         return(-1);
 362
 363     memcpy(out, inb, len);
 364
 365     *outlen = len;
 366     *inlenb = len;
 367     return(*outlen);
 368 }
 369
 370
 371 #ifdef LIBXML_OUTPUT_ENABLED
 372 /**
 373  * UTF8Toisolat1:
 374  * @out:  a pointer to an array of bytes to store the result
 375  * @outlen:  the length of @out
 376  * @in:  a pointer to an array of UTF-8 chars
 377  * @inlen:  the length of @in
 378  *
 379  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
 380  * block of chars out.
 381  *
 382  * Returns the number of bytes written if success, -2 if the transcoding fails,
 383            or -1 otherwise
 384  * The value of @inlen after return is the number of octets consumed
 385  *     if the return value is positive, else unpredictable.
 386  * The value of @outlen after return is the number of octets consumed.
 387  */
 388 int
 389 UTF8Toisolat1(unsigned char* out, int *outlen,
 390               const unsigned char* in, int *inlen) {
 391     const unsigned char* processed = in;
 392     const unsigned char* outend;
 393     const unsigned char* outstart = out;
 394     const unsigned char* instart = in;
 395     const unsigned char* inend;
 396     unsigned int c, d;
 397     int trailing;
 398
 399     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 400     if (in == NULL) {
 401         /*
 402          * initialization nothing to do
 403          */
 404         *outlen = 0;
 405         *inlen = 0;
 406         return(0);
 407     }
 408     inend = in + (*inlen);
 409     outend = out + (*outlen);
 410     while (in < inend) {
 411         d = *in++;
 412         if      (d < 0x80)  { c= d; trailing= 0; }
 413         else if (d < 0xC0) {
 414             /* trailing byte in leading position */
 415             *outlen = out - outstart;
 416             *inlen = processed - instart;
 417             return(-2);
 418         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 419         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 420         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 421         else {
 422             /* no chance for this in IsoLat1 */
 423             *outlen = out - outstart;
 424             *inlen = processed - instart;
 425             return(-2);
 426         }
 427
 428         if (inend - in < trailing) {
 429             break;
 430         }
 431
 432         for ( ; trailing; trailing--) {
 433             if (in >= inend)
 434                 break;
 435             if (((d= *in++) & 0xC0) != 0x80) {
 436                 *outlen = out - outstart;
 437                 *inlen = processed - instart;
 438                 return(-2);
 439             }
 440             c <<= 6;
 441             c |= d & 0x3F;
 442         }
 443
 444         /* assertion: c is a single UTF-4 value */
 445         if (c <= 0xFF) {
 446             if (out >= outend)
 447                 break;
 448             *out++ = c;
 449         } else {
 450             /* no chance for this in IsoLat1 */
 451             *outlen = out - outstart;
 452             *inlen = processed - instart;
 453             return(-2);
 454         }
 455         processed = in;
 456     }
 457     *outlen = out - outstart;
 458     *inlen = processed - instart;
 459     return(*outlen);
 460 }
 461 #endif /* LIBXML_OUTPUT_ENABLED */
 462
 463 /**
 464  * UTF16LEToUTF8:
 465  * @out:  a pointer to an array of bytes to store the result
 466  * @outlen:  the length of @out
 467  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
 468  * @inlenb:  the length of @in in UTF-16LE chars
 469  *
 470  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
 471  * block of chars out. This function assumes the endian property
 472  * is the same between the native type of this machine and the
 473  * inputed one.
 474  *
 475  * Returns the number of bytes written, or -1 if lack of space, or -2
 476  *     if the transcoding fails (if *in is not a valid utf16 string)
 477  *     The value of *inlen after return is the number of octets consumed
 478  *     if the return value is positive, else unpredictable.
 479  */
 480 static int
 481 UTF16LEToUTF8(unsigned char* out, int *outlen,
 482             const unsigned char* inb, int *inlenb)
 483 {
 484     unsigned char* outstart = out;
 485     const unsigned char* processed = inb;
 486     unsigned char* outend = out + *outlen;
 487     unsigned short* in = (unsigned short*) inb;
 488     unsigned short* inend;
 489     unsigned int c, d, inlen;
 490     unsigned char *tmp;
 491     int bits;
 492
 493     if ((*inlenb % 2) == 1)
 494         (*inlenb)--;
 495     inlen = *inlenb / 2;
 496     inend = in + inlen;
 497     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 498         if (xmlLittleEndian) {
 499             c= *in++;
 500         } else {
 501             tmp = (unsigned char *) in;
 502             c = *tmp++;
 503             c = c | (((unsigned int)*tmp) << 8);
 504             in++;
 505         }
 506         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 507             if (in >= inend) {           /* (in > inend) shouldn't happens */
 508                 break;
 509             }
 510             if (xmlLittleEndian) {
 511                 d = *in++;
 512             } else {
 513                 tmp = (unsigned char *) in;
 514                 d = *tmp++;
 515                 d = d | (((unsigned int)*tmp) << 8);
 516                 in++;
 517             }
 518             if ((d & 0xFC00) == 0xDC00) {
 519                 c &= 0x03FF;
 520                 c <<= 10;
 521                 c |= d & 0x03FF;
 522                 c += 0x10000;
 523             }
 524             else {
 525                 *outlen = out - outstart;
 526                 *inlenb = processed - inb;
 527                 return(-2);
 528             }
 529         }
 530
 531         /* assertion: c is a single UTF-4 value */
 532         if (out >= outend)
 533             break;
 534         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 535         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 536         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 537         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 538
 539         for ( ; bits >= 0; bits-= 6) {
 540             if (out >= outend)
 541                 break;
 542             *out++= ((c >> bits) & 0x3F) | 0x80;
 543         }
 544         processed = (const unsigned char*) in;
 545     }
 546     *outlen = out - outstart;
 547     *inlenb = processed - inb;
 548     return(*outlen);
 549 }
 550
 551 #ifdef LIBXML_OUTPUT_ENABLED
 552 /**
 553  * UTF8ToUTF16LE:
 554  * @outb:  a pointer to an array of bytes to store the result
 555  * @outlen:  the length of @outb
 556  * @in:  a pointer to an array of UTF-8 chars
 557  * @inlen:  the length of @in
 558  *
 559  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
 560  * block of chars out.
 561  *
 562  * Returns the number of bytes written, or -1 if lack of space, or -2
 563  *     if the transcoding failed.
 564  */
 565 static int
 566 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
 567             const unsigned char* in, int *inlen)
 568 {
 569     unsigned short* out = (unsigned short*) outb;
 570     const unsigned char* processed = in;
 571     const unsigned char *const instart = in;
 572     unsigned short* outstart= out;
 573     unsigned short* outend;
 574     const unsigned char* inend;
 575     unsigned int c, d;
 576     int trailing;
 577     unsigned char *tmp;
 578     unsigned short tmp1, tmp2;
 579
 580     /* UTF16LE encoding has no BOM */
 581     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 582     if (in == NULL) {
 583         *outlen = 0;
 584         *inlen = 0;
 585         return(0);
 586     }
 587     inend= in + *inlen;
 588     outend = out + (*outlen / 2);
 589     while (in < inend) {
 590       d= *in++;
 591       if      (d < 0x80)  { c= d; trailing= 0; }
 592       else if (d < 0xC0) {
 593           /* trailing byte in leading position */
 594           *outlen = (out - outstart) * 2;
 595           *inlen = processed - instart;
 596           return(-2);
 597       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 598       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 599       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 600       else {
 601         /* no chance for this in UTF-16 */
 602         *outlen = (out - outstart) * 2;
 603         *inlen = processed - instart;
 604         return(-2);
 605       }
 606
 607       if (inend - in < trailing) {
 608           break;
 609       }
 610
 611       for ( ; trailing; trailing--) {
 612           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 613               break;
 614           c <<= 6;
 615           c |= d & 0x3F;
 616       }
 617
 618       /* assertion: c is a single UTF-4 value */
 619         if (c < 0x10000) {
 620             if (out >= outend)
 621                 break;
 622             if (xmlLittleEndian) {
 623                 *out++ = c;
 624             } else {
 625                 tmp = (unsigned char *) out;
 626                 *tmp = c ;
 627                 *(tmp + 1) = c >> 8 ;
 628                 out++;
 629             }
 630         }
 631         else if (c < 0x110000) {
 632             if (out+1 >= outend)
 633                 break;
 634             c -= 0x10000;
 635             if (xmlLittleEndian) {
 636                 *out++ = 0xD800 | (c >> 10);
 637                 *out++ = 0xDC00 | (c & 0x03FF);
 638             } else {
 639                 tmp1 = 0xD800 | (c >> 10);
 640                 tmp = (unsigned char *) out;
 641                 *tmp = (unsigned char) tmp1;
 642                 *(tmp + 1) = tmp1 >> 8;
 643                 out++;
 644
 645                 tmp2 = 0xDC00 | (c & 0x03FF);
 646                 tmp = (unsigned char *) out;
 647                 *tmp  = (unsigned char) tmp2;
 648                 *(tmp + 1) = tmp2 >> 8;
 649                 out++;
 650             }
 651         }
 652         else
 653             break;
 654         processed = in;
 655     }
 656     *outlen = (out - outstart) * 2;
 657     *inlen = processed - instart;
 658     return(*outlen);
 659 }
 660
 661 /**
 662  * UTF8ToUTF16:
 663  * @outb:  a pointer to an array of bytes to store the result
 664  * @outlen:  the length of @outb
 665  * @in:  a pointer to an array of UTF-8 chars
 666  * @inlen:  the length of @in
 667  *
 668  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
 669  * block of chars out.
 670  *
 671  * Returns the number of bytes written, or -1 if lack of space, or -2
 672  *     if the transcoding failed.
 673  */
 674 static int
 675 UTF8ToUTF16(unsigned char* outb, int *outlen,
 676             const unsigned char* in, int *inlen)
 677 {
 678     if (in == NULL) {
 679         /*
 680          * initialization, add the Byte Order Mark for UTF-16LE
 681          */
 682         if (*outlen >= 2) {
 683             outb[0] = 0xFF;
 684             outb[1] = 0xFE;
 685             *outlen = 2;
 686             *inlen = 0;
 687 #ifdef DEBUG_ENCODING
 688             xmlGenericError(xmlGenericErrorContext,
 689                     "Added FFFE Byte Order Mark\n");
 690 #endif
 691             return(2);
 692         }
 693         *outlen = 0;
 694         *inlen = 0;
 695         return(0);
 696     }
 697     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
 698 }
 699 #endif /* LIBXML_OUTPUT_ENABLED */
 700
 701 /**
 702  * UTF16BEToUTF8:
 703  * @out:  a pointer to an array of bytes to store the result
 704  * @outlen:  the length of @out
 705  * @inb:  a pointer to an array of UTF-16 passed as a byte array
 706  * @inlenb:  the length of @in in UTF-16 chars
 707  *
 708  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
 709  * block of chars out. This function assumes the endian property
 710  * is the same between the native type of this machine and the
 711  * inputed one.
 712  *
 713  * Returns the number of bytes written, or -1 if lack of space, or -2
 714  *     if the transcoding fails (if *in is not a valid utf16 string)
 715  * The value of *inlen after return is the number of octets consumed
 716  *     if the return value is positive, else unpredictable.
 717  */
 718 static int
 719 UTF16BEToUTF8(unsigned char* out, int *outlen,
 720             const unsigned char* inb, int *inlenb)
 721 {
 722     unsigned char* outstart = out;
 723     const unsigned char* processed = inb;
 724     unsigned char* outend = out + *outlen;
 725     unsigned short* in = (unsigned short*) inb;
 726     unsigned short* inend;
 727     unsigned int c, d, inlen;
 728     unsigned char *tmp;
 729     int bits;
 730
 731     if ((*inlenb % 2) == 1)
 732         (*inlenb)--;
 733     inlen = *inlenb / 2;
 734     inend= in + inlen;
 735     while (in < inend) {
 736         if (xmlLittleEndian) {
 737             tmp = (unsigned char *) in;
 738             c = *tmp++;
 739             c = c << 8;
 740             c = c | (unsigned int) *tmp;
 741             in++;
 742         } else {
 743             c= *in++;
 744         }
 745         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 746             if (in >= inend) {           /* (in > inend) shouldn't happens */
 747                 *outlen = out - outstart;
 748                 *inlenb = processed - inb;
 749                 return(-2);
 750             }
 751             if (xmlLittleEndian) {
 752                 tmp = (unsigned char *) in;
 753                 d = *tmp++;
 754                 d = d << 8;
 755                 d = d | (unsigned int) *tmp;
 756                 in++;
 757             } else {
 758                 d= *in++;
 759             }
 760             if ((d & 0xFC00) == 0xDC00) {
 761                 c &= 0x03FF;
 762                 c <<= 10;
 763                 c |= d & 0x03FF;
 764                 c += 0x10000;
 765             }
 766             else {
 767                 *outlen = out - outstart;
 768                 *inlenb = processed - inb;
 769                 return(-2);
 770             }
 771         }
 772
 773         /* assertion: c is a single UTF-4 value */
 774         if (out >= outend)
 775             break;
 776         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 777         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 778         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 779         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 780
 781         for ( ; bits >= 0; bits-= 6) {
 782             if (out >= outend)
 783                 break;
 784             *out++= ((c >> bits) & 0x3F) | 0x80;
 785         }
 786         processed = (const unsigned char*) in;
 787     }
 788     *outlen = out - outstart;
 789     *inlenb = processed - inb;
 790     return(*outlen);
 791 }
 792
 793 #ifdef LIBXML_OUTPUT_ENABLED
 794 /**
 795  * UTF8ToUTF16BE:
 796  * @outb:  a pointer to an array of bytes to store the result
 797  * @outlen:  the length of @outb
 798  * @in:  a pointer to an array of UTF-8 chars
 799  * @inlen:  the length of @in
 800  *
 801  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
 802  * block of chars out.
 803  *
 804  * Returns the number of byte written, or -1 by lack of space, or -2
 805  *     if the transcoding failed.
 806  */
 807 static int
 808 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
 809             const unsigned char* in, int *inlen)
 810 {
 811     unsigned short* out = (unsigned short*) outb;
 812     const unsigned char* processed = in;
 813     const unsigned char *const instart = in;
 814     unsigned short* outstart= out;
 815     unsigned short* outend;
 816     const unsigned char* inend;
 817     unsigned int c, d;
 818     int trailing;
 819     unsigned char *tmp;
 820     unsigned short tmp1, tmp2;
 821
 822     /* UTF-16BE has no BOM */
 823     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 824     if (in == NULL) {
 825         *outlen = 0;
 826         *inlen = 0;
 827         return(0);
 828     }
 829     inend= in + *inlen;
 830     outend = out + (*outlen / 2);
 831     while (in < inend) {
 832       d= *in++;
 833       if      (d < 0x80)  { c= d; trailing= 0; }
 834       else if (d < 0xC0)  {
 835           /* trailing byte in leading position */
 836           *outlen = out - outstart;
 837           *inlen = processed - instart;
 838           return(-2);
 839       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 840       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 841       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 842       else {
 843           /* no chance for this in UTF-16 */
 844           *outlen = out - outstart;
 845           *inlen = processed - instart;
 846           return(-2);
 847       }
 848
 849       if (inend - in < trailing) {
 850           break;
 851       }
 852
 853       for ( ; trailing; trailing--) {
 854           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
 855           c <<= 6;
 856           c |= d & 0x3F;
 857       }
 858
 859       /* assertion: c is a single UTF-4 value */
 860         if (c < 0x10000) {
 861             if (out >= outend)  break;
 862             if (xmlLittleEndian) {
 863                 tmp = (unsigned char *) out;
 864                 *tmp = c >> 8;
 865                 *(tmp + 1) = c;
 866                 out++;
 867             } else {
 868                 *out++ = c;
 869             }
 870         }
 871         else if (c < 0x110000) {
 872             if (out+1 >= outend)  break;
 873             c -= 0x10000;
 874             if (xmlLittleEndian) {
 875                 tmp1 = 0xD800 | (c >> 10);
 876                 tmp = (unsigned char *) out;
 877                 *tmp = tmp1 >> 8;
 878                 *(tmp + 1) = (unsigned char) tmp1;
 879                 out++;
 880
 881                 tmp2 = 0xDC00 | (c & 0x03FF);
 882                 tmp = (unsigned char *) out;
 883                 *tmp = tmp2 >> 8;
 884                 *(tmp + 1) = (unsigned char) tmp2;
 885                 out++;
 886             } else {
 887                 *out++ = 0xD800 | (c >> 10);
 888                 *out++ = 0xDC00 | (c & 0x03FF);
 889             }
 890         }
 891         else
 892             break;
 893         processed = in;
 894     }
 895     *outlen = (out - outstart) * 2;
 896     *inlen = processed - instart;
 897     return(*outlen);
 898 }
 899 #endif /* LIBXML_OUTPUT_ENABLED */
 900
 901 /************************************************************************
 902  *                                                                      *
 903  *              Generic encoding handling routines                      *
 904  *                                                                      *
 905  ************************************************************************/
 906
 907 /**
 908  * xmlDetectCharEncoding:
 909  * @in:  a pointer to the first bytes of the XML entity, must be at least
 910  *       2 bytes long (at least 4 if encoding is UTF4 variant).
 911  * @len:  pointer to the length of the buffer
 912  *
 913  * Guess the encoding of the entity using the first bytes of the entity content
 914  * according to the non-normative appendix F of the XML-1.0 recommendation.
 915  *
 916  * Returns one of the XML_CHAR_ENCODING_... values.
 917  */
 918 xmlCharEncoding
 919 xmlDetectCharEncoding(const unsigned char* in, int len)
 920 {
 921     if (in == NULL)
 922         return(XML_CHAR_ENCODING_NONE);
 923     if (len >= 4) {
 924         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 925             (in[2] == 0x00) && (in[3] == 0x3C))
 926             return(XML_CHAR_ENCODING_UCS4BE);
 927         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 928             (in[2] == 0x00) && (in[3] == 0x00))
 929             return(XML_CHAR_ENCODING_UCS4LE);
 930         if ((in[0] == 0x00) && (in[1] == 0x00) &&
 931             (in[2] == 0x3C) && (in[3] == 0x00))
 932             return(XML_CHAR_ENCODING_UCS4_2143);
 933         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 934             (in[2] == 0x00) && (in[3] == 0x00))
 935             return(XML_CHAR_ENCODING_UCS4_3412);
 936         if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
 937             (in[2] == 0xA7) && (in[3] == 0x94))
 938             return(XML_CHAR_ENCODING_EBCDIC);
 939         if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
 940             (in[2] == 0x78) && (in[3] == 0x6D))
 941             return(XML_CHAR_ENCODING_UTF8);
 942         /*
 943          * Although not part of the recommendation, we also
 944          * attempt an "auto-recognition" of UTF-16LE and
 945          * UTF-16BE encodings.
 946          */
 947         if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 948             (in[2] == 0x3F) && (in[3] == 0x00))
 949             return(XML_CHAR_ENCODING_UTF16LE);
 950         if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 951             (in[2] == 0x00) && (in[3] == 0x3F))
 952             return(XML_CHAR_ENCODING_UTF16BE);
 953     }
 954     if (len >= 3) {
 955         /*
 956          * Errata on XML-1.0 June 20 2001
 957          * We now allow an UTF8 encoded BOM
 958          */
 959         if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
 960             (in[2] == 0xBF))
 961             return(XML_CHAR_ENCODING_UTF8);
 962     }
 963     /* For UTF-16 we can recognize by the BOM */
 964     if (len >= 2) {
 965         if ((in[0] == 0xFE) && (in[1] == 0xFF))
 966             return(XML_CHAR_ENCODING_UTF16BE);
 967         if ((in[0] == 0xFF) && (in[1] == 0xFE))
 968             return(XML_CHAR_ENCODING_UTF16LE);
 969     }
 970     return(XML_CHAR_ENCODING_NONE);
 971 }
 972
 973 /**
 974  * xmlCleanupEncodingAliases:
 975  *
 976  * Unregisters all aliases
 977  */
 978 void
 979 xmlCleanupEncodingAliases(void) {
 980     int i;
 981
 982     if (xmlCharEncodingAliases == NULL)
 983         return;
 984
 985     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 986         if (xmlCharEncodingAliases[i].name != NULL)
 987             xmlFree((char *) xmlCharEncodingAliases[i].name);
 988         if (xmlCharEncodingAliases[i].alias != NULL)
 989             xmlFree((char *) xmlCharEncodingAliases[i].alias);
 990     }
 991     xmlCharEncodingAliasesNb = 0;
 992     xmlCharEncodingAliasesMax = 0;
 993     xmlFree(xmlCharEncodingAliases);
 994     xmlCharEncodingAliases = NULL;
 995 }
 996
 997 /**
 998  * xmlGetEncodingAlias:
 999  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1000  *
1001  * Lookup an encoding name for the given alias.
1002  *
1003  * Returns NULL if not found, otherwise the original name
1004  */
1005 const char *
1006 xmlGetEncodingAlias(const char *alias) {
1007     int i;
1008     char upper[100];
1009
1010     if (alias == NULL)
1011         return(NULL);
1012
1013     if (xmlCharEncodingAliases == NULL)
1014         return(NULL);
1015
1016     for (i = 0;i < 99;i++) {
1017         upper[i] = toupper(alias[i]);
1018         if (upper[i] == 0) break;
1019     }
1020     upper[i] = 0;
1021
1022     /*
1023      * Walk down the list looking for a definition of the alias
1024      */
1025     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027             return(xmlCharEncodingAliases[i].name);
1028         }
1029     }
1030     return(NULL);
1031 }
1032
1033 /**
1034  * xmlAddEncodingAlias:
1035  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1036  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1037  *
1038  * Registers an alias @alias for an encoding named @name. Existing alias
1039  * will be overwritten.
1040  *
1041  * Returns 0 in case of success, -1 in case of error
1042  */
1043 int
1044 xmlAddEncodingAlias(const char *name, const char *alias) {
1045     int i;
1046     char upper[100];
1047
1048     if ((name == NULL) || (alias == NULL))
1049         return(-1);
1050
1051     for (i = 0;i < 99;i++) {
1052         upper[i] = toupper(alias[i]);
1053         if (upper[i] == 0) break;
1054     }
1055     upper[i] = 0;
1056
1057     if (xmlCharEncodingAliases == NULL) {
1058         xmlCharEncodingAliasesNb = 0;
1059         xmlCharEncodingAliasesMax = 20;
1060         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1061               xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1062         if (xmlCharEncodingAliases == NULL)
1063             return(-1);
1064     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1065         xmlCharEncodingAliasesMax *= 2;
1066         xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1067               xmlRealloc(xmlCharEncodingAliases,
1068                          xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1069     }
1070     /*
1071      * Walk down the list looking for a definition of the alias
1072      */
1073     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074         if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075             /*
1076              * Replace the definition.
1077              */
1078             xmlFree((char *) xmlCharEncodingAliases[i].name);
1079             xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1080             return(0);
1081         }
1082     }
1083     /*
1084      * Add the definition
1085      */
1086     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1087     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1088     xmlCharEncodingAliasesNb++;
1089     return(0);
1090 }
1091
1092 /**
1093  * xmlDelEncodingAlias:
1094  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1095  *
1096  * Unregisters an encoding alias @alias
1097  *
1098  * Returns 0 in case of success, -1 in case of error
1099  */
1100 int
1101 xmlDelEncodingAlias(const char *alias) {
1102     int i;
1103
1104     if (alias == NULL)
1105         return(-1);
1106
1107     if (xmlCharEncodingAliases == NULL)
1108         return(-1);
1109     /*
1110      * Walk down the list looking for a definition of the alias
1111      */
1112     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1113         if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1114             xmlFree((char *) xmlCharEncodingAliases[i].name);
1115             xmlFree((char *) xmlCharEncodingAliases[i].alias);
1116             xmlCharEncodingAliasesNb--;
1117             memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1118                     sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1119             return(0);
1120         }
1121     }
1122     return(-1);
1123 }
1124
1125 /**
1126  * xmlParseCharEncoding:
1127  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1128  *
1129  * Compare the string to the encoding schemes already known. Note
1130  * that the comparison is case insensitive accordingly to the section
1131  * [XML] 4.3.3 Character Encoding in Entities.
1132  *
1133  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1134  * if not recognized.
1135  */
1136 xmlCharEncoding
1137 xmlParseCharEncoding(const char* name)
1138 {
1139     const char *alias;
1140     char upper[500];
1141     int i;
1142
1143     if (name == NULL)
1144         return(XML_CHAR_ENCODING_NONE);
1145
1146     /*
1147      * Do the alias resolution
1148      */
1149     alias = xmlGetEncodingAlias(name);
1150     if (alias != NULL)
1151         name = alias;
1152
1153     for (i = 0;i < 499;i++) {
1154         upper[i] = toupper(name[i]);
1155         if (upper[i] == 0) break;
1156     }
1157     upper[i] = 0;
1158
1159     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1160     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1161     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1162
1163     /*
1164      * NOTE: if we were able to parse this, the endianness of UTF16 is
1165      *       already found and in use
1166      */
1167     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1168     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1169
1170     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1171     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1172     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1173
1174     /*
1175      * NOTE: if we were able to parse this, the endianness of UCS4 is
1176      *       already found and in use
1177      */
1178     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1179     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1180     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1181
1182
1183     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1184     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1185     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1186
1187     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1188     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1189     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1190
1191     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1192     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1193     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1194     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1195     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1196     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1197     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1198
1199     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1200     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1201     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1202
1203 #ifdef DEBUG_ENCODING
1204     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1205 #endif
1206     return(XML_CHAR_ENCODING_ERROR);
1207 }
1208
1209 /**
1210  * xmlGetCharEncodingName:
1211  * @enc:  the encoding
1212  *
1213  * The "canonical" name for XML encoding.
1214  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1215  * Section 4.3.3  Character Encoding in Entities
1216  *
1217  * Returns the canonical name for the given encoding
1218  */
1219
1220 const char*
1221 xmlGetCharEncodingName(xmlCharEncoding enc) {
1222     switch (enc) {
1223         case XML_CHAR_ENCODING_ERROR:
1224             return(NULL);
1225         case XML_CHAR_ENCODING_NONE:
1226             return(NULL);
1227         case XML_CHAR_ENCODING_UTF8:
1228             return("UTF-8");
1229         case XML_CHAR_ENCODING_UTF16LE:
1230             return("UTF-16");
1231         case XML_CHAR_ENCODING_UTF16BE:
1232             return("UTF-16");
1233         case XML_CHAR_ENCODING_EBCDIC:
1234             return("EBCDIC");
1235         case XML_CHAR_ENCODING_UCS4LE:
1236             return("ISO-10646-UCS-4");
1237         case XML_CHAR_ENCODING_UCS4BE:
1238             return("ISO-10646-UCS-4");
1239         case XML_CHAR_ENCODING_UCS4_2143:
1240             return("ISO-10646-UCS-4");
1241         case XML_CHAR_ENCODING_UCS4_3412:
1242             return("ISO-10646-UCS-4");
1243         case XML_CHAR_ENCODING_UCS2:
1244             return("ISO-10646-UCS-2");
1245         case XML_CHAR_ENCODING_8859_1:
1246             return("ISO-8859-1");
1247         case XML_CHAR_ENCODING_8859_2:
1248             return("ISO-8859-2");
1249         case XML_CHAR_ENCODING_8859_3:
1250             return("ISO-8859-3");
1251         case XML_CHAR_ENCODING_8859_4:
1252             return("ISO-8859-4");
1253         case XML_CHAR_ENCODING_8859_5:
1254             return("ISO-8859-5");
1255         case XML_CHAR_ENCODING_8859_6:
1256             return("ISO-8859-6");
1257         case XML_CHAR_ENCODING_8859_7:
1258             return("ISO-8859-7");
1259         case XML_CHAR_ENCODING_8859_8:
1260             return("ISO-8859-8");
1261         case XML_CHAR_ENCODING_8859_9:
1262             return("ISO-8859-9");
1263         case XML_CHAR_ENCODING_2022_JP:
1264             return("ISO-2022-JP");
1265         case XML_CHAR_ENCODING_SHIFT_JIS:
1266             return("Shift-JIS");
1267         case XML_CHAR_ENCODING_EUC_JP:
1268             return("EUC-JP");
1269         case XML_CHAR_ENCODING_ASCII:
1270             return(NULL);
1271     }
1272     return(NULL);
1273 }
1274
1275 /************************************************************************
1276  *                                                                      *
1277  *                      Char encoding handlers                          *
1278  *                                                                      *
1279  ************************************************************************/
1280
1281
1282 /* the size should be growable, but it's not a big deal ... */
1283 #define MAX_ENCODING_HANDLERS 50
1284 static xmlCharEncodingHandlerPtr *handlers = NULL;
1285 static int nbCharEncodingHandler = 0;
1286
1287 /*
1288  * The default is UTF-8 for XML, that's also the default used for the
1289  * parser internals, so the default encoding handler is NULL
1290  */
1291
1292 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1293
1294 /**
1295  * xmlNewCharEncodingHandler:
1296  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1297  * @input:  the xmlCharEncodingInputFunc to read that encoding
1298  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1299  *
1300  * Create and registers an xmlCharEncodingHandler.
1301  *
1302  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1303  */
1304 xmlCharEncodingHandlerPtr
1305 xmlNewCharEncodingHandler(const char *name,
1306                           xmlCharEncodingInputFunc input,
1307                           xmlCharEncodingOutputFunc output) {
1308     xmlCharEncodingHandlerPtr handler;
1309     const char *alias;
1310     char upper[500];
1311     int i;
1312     char *up = NULL;
1313
1314     /*
1315      * Do the alias resolution
1316      */
1317     alias = xmlGetEncodingAlias(name);
1318     if (alias != NULL)
1319         name = alias;
1320
1321     /*
1322      * Keep only the uppercase version of the encoding.
1323      */
1324     if (name == NULL) {
1325         xmlEncodingErr(XML_I18N_NO_NAME,
1326                        "xmlNewCharEncodingHandler : no name !\n", NULL);
1327         return(NULL);
1328     }
1329     for (i = 0;i < 499;i++) {
1330         upper[i] = toupper(name[i]);
1331         if (upper[i] == 0) break;
1332     }
1333     upper[i] = 0;
1334     up = xmlMemStrdup(upper);
1335     if (up == NULL) {
1336         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1337         return(NULL);
1338     }
1339
1340     /*
1341      * allocate and fill-up an handler block.
1342      */
1343     handler = (xmlCharEncodingHandlerPtr)
1344               xmlMalloc(sizeof(xmlCharEncodingHandler));
1345     if (handler == NULL) {
1346         xmlFree(up);
1347         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1348         return(NULL);
1349     }
1350     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1351     handler->input = input;
1352     handler->output = output;
1353     handler->name = up;
1354
1355 #ifdef LIBXML_ICONV_ENABLED
1356     handler->iconv_in = NULL;
1357     handler->iconv_out = NULL;
1358 #endif
1359 #ifdef LIBXML_ICU_ENABLED
1360     handler->uconv_in = NULL;
1361     handler->uconv_out = NULL;
1362 #endif
1363
1364     /*
1365      * registers and returns the handler.
1366      */
1367     xmlRegisterCharEncodingHandler(handler);
1368 #ifdef DEBUG_ENCODING
1369     xmlGenericError(xmlGenericErrorContext,
1370             "Registered encoding handler for %s\n", name);
1371 #endif
1372     return(handler);
1373 }
1374
1375 /**
1376  * xmlInitCharEncodingHandlers:
1377  *
1378  * Initialize the char encoding support, it registers the default
1379  * encoding supported.
1380  * NOTE: while public, this function usually doesn't need to be called
1381  *       in normal processing.
1382  */
1383 void
1384 xmlInitCharEncodingHandlers(void) {
1385     unsigned short int tst = 0x1234;
1386     unsigned char *ptr = (unsigned char *) &tst;
1387
1388     if (handlers != NULL) return;
1389
1390     handlers = (xmlCharEncodingHandlerPtr *)
1391         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1392
1393     if (*ptr == 0x12) xmlLittleEndian = 0;
1394     else if (*ptr == 0x34) xmlLittleEndian = 1;
1395     else {
1396         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1397                        "Odd problem at endianness detection\n", NULL);
1398     }
1399
1400     if (handlers == NULL) {
1401         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1402         return;
1403     }
1404     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1405 #ifdef LIBXML_OUTPUT_ENABLED
1406     xmlUTF16LEHandler =
1407           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1408     xmlUTF16BEHandler =
1409           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1410     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1411     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1412     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1413     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1414 #ifdef LIBXML_HTML_ENABLED
1415     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1416 #endif
1417 #else
1418     xmlUTF16LEHandler =
1419           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1420     xmlUTF16BEHandler =
1421           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1422     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1423     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1424     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1425     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1426 #endif /* LIBXML_OUTPUT_ENABLED */
1427 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1428 #ifdef LIBXML_ISO8859X_ENABLED
1429     xmlRegisterCharEncodingHandlersISO8859x ();
1430 #endif
1431 #endif
1432
1433 }
1434
1435 /**
1436  * xmlCleanupCharEncodingHandlers:
1437  *
1438  * Cleanup the memory allocated for the char encoding support, it
1439  * unregisters all the encoding handlers and the aliases.
1440  */
1441 void
1442 xmlCleanupCharEncodingHandlers(void) {
1443     xmlCleanupEncodingAliases();
1444
1445     if (handlers == NULL) return;
1446
1447     for (;nbCharEncodingHandler > 0;) {
1448         nbCharEncodingHandler--;
1449         if (handlers[nbCharEncodingHandler] != NULL) {
1450             if (handlers[nbCharEncodingHandler]->name != NULL)
1451                 xmlFree(handlers[nbCharEncodingHandler]->name);
1452             xmlFree(handlers[nbCharEncodingHandler]);
1453         }
1454     }
1455     xmlFree(handlers);
1456     handlers = NULL;
1457     nbCharEncodingHandler = 0;
1458     xmlDefaultCharEncodingHandler = NULL;
1459 }
1460
1461 /**
1462  * xmlRegisterCharEncodingHandler:
1463  * @handler:  the xmlCharEncodingHandlerPtr handler block
1464  *
1465  * Register the char encoding handler, surprising, isn't it ?
1466  */
1467 void
1468 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1469     if (handlers == NULL) xmlInitCharEncodingHandlers();
1470     if ((handler == NULL) || (handlers == NULL)) {
1471         xmlEncodingErr(XML_I18N_NO_HANDLER,
1472                 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1473         return;
1474     }
1475
1476     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1477         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1478         "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1479                        "MAX_ENCODING_HANDLERS");
1480         return;
1481     }
1482     handlers[nbCharEncodingHandler++] = handler;
1483 }
1484
1485 /**
1486  * xmlGetCharEncodingHandler:
1487  * @enc:  an xmlCharEncoding value.
1488  *
1489  * Search in the registered set the handler able to read/write that encoding.
1490  *
1491  * Returns the handler or NULL if not found
1492  */
1493 xmlCharEncodingHandlerPtr
1494 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1495     xmlCharEncodingHandlerPtr handler;
1496
1497     if (handlers == NULL) xmlInitCharEncodingHandlers();
1498     switch (enc) {
1499         case XML_CHAR_ENCODING_ERROR:
1500             return(NULL);
1501         case XML_CHAR_ENCODING_NONE:
1502             return(NULL);
1503         case XML_CHAR_ENCODING_UTF8:
1504             return(NULL);
1505         case XML_CHAR_ENCODING_UTF16LE:
1506             return(xmlUTF16LEHandler);
1507         case XML_CHAR_ENCODING_UTF16BE:
1508             return(xmlUTF16BEHandler);
1509         case XML_CHAR_ENCODING_EBCDIC:
1510             handler = xmlFindCharEncodingHandler("EBCDIC");
1511             if (handler != NULL) return(handler);
1512             handler = xmlFindCharEncodingHandler("ebcdic");
1513             if (handler != NULL) return(handler);
1514             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1515             if (handler != NULL) return(handler);
1516             break;
1517         case XML_CHAR_ENCODING_UCS4BE:
1518             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1519             if (handler != NULL) return(handler);
1520             handler = xmlFindCharEncodingHandler("UCS-4");
1521             if (handler != NULL) return(handler);
1522             handler = xmlFindCharEncodingHandler("UCS4");
1523             if (handler != NULL) return(handler);
1524             break;
1525         case XML_CHAR_ENCODING_UCS4LE:
1526             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1527             if (handler != NULL) return(handler);
1528             handler = xmlFindCharEncodingHandler("UCS-4");
1529             if (handler != NULL) return(handler);
1530             handler = xmlFindCharEncodingHandler("UCS4");
1531             if (handler != NULL) return(handler);
1532             break;
1533         case XML_CHAR_ENCODING_UCS4_2143:
1534             break;
1535         case XML_CHAR_ENCODING_UCS4_3412:
1536             break;
1537         case XML_CHAR_ENCODING_UCS2:
1538             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1539             if (handler != NULL) return(handler);
1540             handler = xmlFindCharEncodingHandler("UCS-2");
1541             if (handler != NULL) return(handler);
1542             handler = xmlFindCharEncodingHandler("UCS2");
1543             if (handler != NULL) return(handler);
1544             break;
1545
1546             /*
1547              * We used to keep ISO Latin encodings native in the
1548              * generated data. This led to so many problems that
1549              * this has been removed. One can still change this
1550              * back by registering no-ops encoders for those
1551              */
1552         case XML_CHAR_ENCODING_8859_1:
1553             handler = xmlFindCharEncodingHandler("ISO-8859-1");
1554             if (handler != NULL) return(handler);
1555             break;
1556         case XML_CHAR_ENCODING_8859_2:
1557             handler = xmlFindCharEncodingHandler("ISO-8859-2");
1558             if (handler != NULL) return(handler);
1559             break;
1560         case XML_CHAR_ENCODING_8859_3:
1561             handler = xmlFindCharEncodingHandler("ISO-8859-3");
1562             if (handler != NULL) return(handler);
1563             break;
1564         case XML_CHAR_ENCODING_8859_4:
1565             handler = xmlFindCharEncodingHandler("ISO-8859-4");
1566             if (handler != NULL) return(handler);
1567             break;
1568         case XML_CHAR_ENCODING_8859_5:
1569             handler = xmlFindCharEncodingHandler("ISO-8859-5");
1570             if (handler != NULL) return(handler);
1571             break;
1572         case XML_CHAR_ENCODING_8859_6:
1573             handler = xmlFindCharEncodingHandler("ISO-8859-6");
1574             if (handler != NULL) return(handler);
1575             break;
1576         case XML_CHAR_ENCODING_8859_7:
1577             handler = xmlFindCharEncodingHandler("ISO-8859-7");
1578             if (handler != NULL) return(handler);
1579             break;
1580         case XML_CHAR_ENCODING_8859_8:
1581             handler = xmlFindCharEncodingHandler("ISO-8859-8");
1582             if (handler != NULL) return(handler);
1583             break;
1584         case XML_CHAR_ENCODING_8859_9:
1585             handler = xmlFindCharEncodingHandler("ISO-8859-9");
1586             if (handler != NULL) return(handler);
1587             break;
1588
1589
1590         case XML_CHAR_ENCODING_2022_JP:
1591             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1592             if (handler != NULL) return(handler);
1593             break;
1594         case XML_CHAR_ENCODING_SHIFT_JIS:
1595             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1596             if (handler != NULL) return(handler);
1597             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1598             if (handler != NULL) return(handler);
1599             handler = xmlFindCharEncodingHandler("Shift_JIS");
1600             if (handler != NULL) return(handler);
1601             break;
1602         case XML_CHAR_ENCODING_EUC_JP:
1603             handler = xmlFindCharEncodingHandler("EUC-JP");
1604             if (handler != NULL) return(handler);
1605             break;
1606         default:
1607             break;
1608     }
1609
1610 #ifdef DEBUG_ENCODING
1611     xmlGenericError(xmlGenericErrorContext,
1612             "No handler found for encoding %d\n", enc);
1613 #endif
1614     return(NULL);
1615 }
1616
1617 /**
1618  * xmlFindCharEncodingHandler:
1619  * @name:  a string describing the char encoding.
1620  *
1621  * Search in the registered set the handler able to read/write that encoding.
1622  *
1623  * Returns the handler or NULL if not found
1624  */
1625 xmlCharEncodingHandlerPtr
1626 xmlFindCharEncodingHandler(const char *name) {
1627     const char *nalias;
1628     const char *norig;
1629     xmlCharEncoding alias;
1630 #ifdef LIBXML_ICONV_ENABLED
1631     xmlCharEncodingHandlerPtr enc;
1632     iconv_t icv_in, icv_out;
1633 #endif /* LIBXML_ICONV_ENABLED */
1634 #ifdef LIBXML_ICU_ENABLED
1635     xmlCharEncodingHandlerPtr encu;
1636     uconv_t *ucv_in, *ucv_out;
1637 #endif /* LIBXML_ICU_ENABLED */
1638     char upper[100];
1639     int i;
1640
1641     if (handlers == NULL) xmlInitCharEncodingHandlers();
1642     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1643     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1644
1645     /*
1646      * Do the alias resolution
1647      */
1648     norig = name;
1649     nalias = xmlGetEncodingAlias(name);
1650     if (nalias != NULL)
1651         name = nalias;
1652
1653     /*
1654      * Check first for directly registered encoding names
1655      */
1656     for (i = 0;i < 99;i++) {
1657         upper[i] = toupper(name[i]);
1658         if (upper[i] == 0) break;
1659     }
1660     upper[i] = 0;
1661
1662     if (handlers != NULL) {
1663         for (i = 0;i < nbCharEncodingHandler; i++) {
1664             if (!strcmp(upper, handlers[i]->name)) {
1665 #ifdef DEBUG_ENCODING
1666                 xmlGenericError(xmlGenericErrorContext,
1667                         "Found registered handler for encoding %s\n", name);
1668 #endif
1669                 return(handlers[i]);
1670             }
1671         }
1672     }
1673
1674 #ifdef LIBXML_ICONV_ENABLED
1675     /* check whether iconv can handle this */
1676     icv_in = iconv_open("UTF-8", name);
1677     icv_out = iconv_open(name, "UTF-8");
1678     if (icv_in == (iconv_t) -1) {
1679         icv_in = iconv_open("UTF-8", upper);
1680     }
1681     if (icv_out == (iconv_t) -1) {
1682         icv_out = iconv_open(upper, "UTF-8");
1683     }
1684     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1685             enc = (xmlCharEncodingHandlerPtr)
1686                   xmlMalloc(sizeof(xmlCharEncodingHandler));
1687             if (enc == NULL) {
1688                 iconv_close(icv_in);
1689                 iconv_close(icv_out);
1690                 return(NULL);
1691             }
1692             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1693             enc->name = xmlMemStrdup(name);
1694             enc->input = NULL;
1695             enc->output = NULL;
1696             enc->iconv_in = icv_in;
1697             enc->iconv_out = icv_out;
1698 #ifdef DEBUG_ENCODING
1699             xmlGenericError(xmlGenericErrorContext,
1700                     "Found iconv handler for encoding %s\n", name);
1701 #endif
1702             return enc;
1703     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1704             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1705                     "iconv : problems with filters for '%s'\n", name);
1706     }
1707 #endif /* LIBXML_ICONV_ENABLED */
1708 #ifdef LIBXML_ICU_ENABLED
1709     /* check whether icu can handle this */
1710     ucv_in = openIcuConverter(name, 1);
1711     ucv_out = openIcuConverter(name, 0);
1712     if (ucv_in != NULL && ucv_out != NULL) {
1713             encu = (xmlCharEncodingHandlerPtr)
1714                    xmlMalloc(sizeof(xmlCharEncodingHandler));
1715             if (encu == NULL) {
1716                 closeIcuConverter(ucv_in);
1717                 closeIcuConverter(ucv_out);
1718                 return(NULL);
1719             }
1720             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1721             encu->name = xmlMemStrdup(name);
1722             encu->input = NULL;
1723             encu->output = NULL;
1724             encu->uconv_in = ucv_in;
1725             encu->uconv_out = ucv_out;
1726 #ifdef DEBUG_ENCODING
1727             xmlGenericError(xmlGenericErrorContext,
1728                     "Found ICU converter handler for encoding %s\n", name);
1729 #endif
1730             return encu;
1731     } else if (ucv_in != NULL || ucv_out != NULL) {
1732             closeIcuConverter(ucv_in);
1733             closeIcuConverter(ucv_out);
1734             xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1735                     "ICU converter : problems with filters for '%s'\n", name);
1736     }
1737 #endif /* LIBXML_ICU_ENABLED */
1738
1739 #ifdef DEBUG_ENCODING
1740     xmlGenericError(xmlGenericErrorContext,
1741             "No handler found for encoding %s\n", name);
1742 #endif
1743
1744     /*
1745      * Fallback using the canonical names
1746      */
1747     alias = xmlParseCharEncoding(norig);
1748     if (alias != XML_CHAR_ENCODING_ERROR) {
1749         const char* canon;
1750         canon = xmlGetCharEncodingName(alias);
1751         if ((canon != NULL) && (strcmp(name, canon))) {
1752             return(xmlFindCharEncodingHandler(canon));
1753         }
1754     }
1755
1756     /* If "none of the above", give up */
1757     return(NULL);
1758 }
1759
1760 /************************************************************************
1761  *                                                                      *
1762  *              ICONV based generic conversion functions                *
1763  *                                                                      *
1764  ************************************************************************/
1765
1766 #ifdef LIBXML_ICONV_ENABLED
1767 /**
1768  * xmlIconvWrapper:
1769  * @cd:         iconv converter data structure
1770  * @out:  a pointer to an array of bytes to store the result
1771  * @outlen:  the length of @out
1772  * @in:  a pointer to an array of ISO Latin 1 chars
1773  * @inlen:  the length of @in
1774  *
1775  * Returns 0 if success, or
1776  *     -1 by lack of space, or
1777  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1778  *        the result of transformation can't fit into the encoding we want), or
1779  *     -3 if there the last byte can't form a single output char.
1780  *
1781  * The value of @inlen after return is the number of octets consumed
1782  *     as the return value is positive, else unpredictable.
1783  * The value of @outlen after return is the number of ocetes consumed.
1784  */
1785 static int
1786 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1787                 const unsigned char *in, int *inlen) {
1788     size_t icv_inlen, icv_outlen;
1789     const char *icv_in = (const char *) in;
1790     char *icv_out = (char *) out;
1791     int ret;
1792
1793     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1794         if (outlen != NULL) *outlen = 0;
1795         return(-1);
1796     }
1797     icv_inlen = *inlen;
1798     icv_outlen = *outlen;
1799     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1800     *inlen -= icv_inlen;
1801     *outlen -= icv_outlen;
1802     if ((icv_inlen != 0) || (ret == -1)) {
1803 #ifdef EILSEQ
1804         if (errno == EILSEQ) {
1805             return -2;
1806         } else
1807 #endif
1808 #ifdef E2BIG
1809         if (errno == E2BIG) {
1810             return -1;
1811         } else
1812 #endif
1813 #ifdef EINVAL
1814         if (errno == EINVAL) {
1815             return -3;
1816         } else
1817 #endif
1818         {
1819             return -3;
1820         }
1821     }
1822     return 0;
1823 }
1824 #endif /* LIBXML_ICONV_ENABLED */
1825
1826 /************************************************************************
1827  *                                                                      *
1828  *              ICU based generic conversion functions                  *
1829  *                                                                      *
1830  ************************************************************************/
1831
1832 #ifdef LIBXML_ICU_ENABLED
1833 /**
1834  * xmlUconvWrapper:
1835  * @cd: ICU uconverter data structure
1836  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1837  * @out:  a pointer to an array of bytes to store the result
1838  * @outlen:  the length of @out
1839  * @in:  a pointer to an array of ISO Latin 1 chars
1840  * @inlen:  the length of @in
1841  *
1842  * Returns 0 if success, or
1843  *     -1 by lack of space, or
1844  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1845  *        the result of transformation can't fit into the encoding we want), or
1846  *     -3 if there the last byte can't form a single output char.
1847  *
1848  * The value of @inlen after return is the number of octets consumed
1849  *     as the return value is positive, else unpredictable.
1850  * The value of @outlen after return is the number of ocetes consumed.
1851  */
1852 static int
1853 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1854                 const unsigned char *in, int *inlen) {
1855     const char *ucv_in = (const char *) in;
1856     char *ucv_out = (char *) out;
1857     UErrorCode err = U_ZERO_ERROR;
1858
1859     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1860         if (outlen != NULL) *outlen = 0;
1861         return(-1);
1862     }
1863
1864     /*
1865      * TODO(jungshik)
1866      * 1. is ucnv_convert(To|From)Algorithmic better?
1867      * 2. had we better use an explicit pivot buffer?
1868      * 3. error returned comes from 'fromUnicode' only even
1869      *    when toUnicode is true !
1870      */
1871     if (toUnicode) {
1872         /* encoding => UTF-16 => UTF-8 */
1873         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1874                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1875                        0, TRUE, &err);
1876     } else {
1877         /* UTF-8 => UTF-16 => encoding */
1878         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1879                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1880                        0, TRUE, &err);
1881     }
1882     *inlen = ucv_in - (const char*) in;
1883     *outlen = ucv_out - (char *) out;
1884     if (U_SUCCESS(err))
1885         return 0;
1886     if (err == U_BUFFER_OVERFLOW_ERROR)
1887         return -1;
1888     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1889         return -2;
1890     /* if (err == U_TRUNCATED_CHAR_FOUND) */
1891     return -3;
1892 }
1893 #endif /* LIBXML_ICU_ENABLED */
1894
1895 /************************************************************************
1896  *                                                                      *
1897  *              The real API used by libxml for on-the-fly conversion   *
1898  *                                                                      *
1899  ************************************************************************/
1900 int
1901 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1902                        xmlBufferPtr in, int len);
1903
1904 /**
1905  * xmlCharEncFirstLineInt:
1906  * @handler:    char enconding transformation data structure
1907  * @out:  an xmlBuffer for the output.
1908  * @in:  an xmlBuffer for the input
1909  * @len:  number of bytes to convert for the first line, or -1
1910  *
1911  * Front-end for the encoding handler input function, but handle only
1912  * the very first line, i.e. limit itself to 45 chars.
1913  *
1914  * Returns the number of byte written if success, or
1915  *     -1 general error
1916  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1917  *        the result of transformation can't fit into the encoding we want), or
1918  */
1919 int
1920 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1921                        xmlBufferPtr in, int len) {
1922     int ret = -2;
1923     int written;
1924     int toconv;
1925
1926     if (handler == NULL) return(-1);
1927     if (out == NULL) return(-1);
1928     if (in == NULL) return(-1);
1929
1930     /* calculate space available */
1931     written = out->size - out->use - 1; /* count '\0' */
1932     toconv = in->use;
1933     /*
1934      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1935      * 45 chars should be sufficient to reach the end of the encoding
1936      * declaration without going too far inside the document content.
1937      * on UTF-16 this means 90bytes, on UCS4 this means 180
1938      * The actual value depending on guessed encoding is passed as @len
1939      * if provided
1940      */
1941     if (len >= 0) {
1942         if (toconv > len)
1943             toconv = len;
1944     } else {
1945         if (toconv > 180)
1946             toconv = 180;
1947     }
1948     if (toconv * 2 >= written) {
1949         xmlBufferGrow(out, toconv);
1950         written = out->size - out->use - 1;
1951     }
1952
1953     if (handler->input != NULL) {
1954         ret = handler->input(&out->content[out->use], &written,
1955                              in->content, &toconv);
1956         xmlBufferShrink(in, toconv);
1957         out->use += written;
1958         out->content[out->use] = 0;
1959     }
1960 #ifdef LIBXML_ICONV_ENABLED
1961     else if (handler->iconv_in != NULL) {
1962         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1963                               &written, in->content, &toconv);
1964         xmlBufferShrink(in, toconv);
1965         out->use += written;
1966         out->content[out->use] = 0;
1967         if (ret == -1) ret = -3;
1968     }
1969 #endif /* LIBXML_ICONV_ENABLED */
1970 #ifdef LIBXML_ICU_ENABLED
1971     else if (handler->uconv_in != NULL) {
1972         ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1973                               &written, in->content, &toconv);
1974         xmlBufferShrink(in, toconv);
1975         out->use += written;
1976         out->content[out->use] = 0;
1977         if (ret == -1) ret = -3;
1978     }
1979 #endif /* LIBXML_ICU_ENABLED */
1980 #ifdef DEBUG_ENCODING
1981     switch (ret) {
1982         case 0:
1983             xmlGenericError(xmlGenericErrorContext,
1984                     "converted %d bytes to %d bytes of input\n",
1985                     toconv, written);
1986             break;
1987         case -1:
1988             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1989                     toconv, written, in->use);
1990             break;
1991         case -2:
1992             xmlGenericError(xmlGenericErrorContext,
1993                     "input conversion failed due to input error\n");
1994             break;
1995         case -3:
1996             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1997                     toconv, written, in->use);
1998             break;
1999         default:
2000             xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2001     }
2002 #endif /* DEBUG_ENCODING */
2003     /*
2004      * Ignore when input buffer is not on a boundary
2005      */
2006     if (ret == -3) ret = 0;
2007     if (ret == -1) ret = 0;
2008     return(ret);
2009 }
2010
2011 /**
2012  * xmlCharEncFirstLine:
2013  * @handler:    char enconding transformation data structure
2014  * @out:  an xmlBuffer for the output.
2015  * @in:  an xmlBuffer for the input
2016  *
2017  * Front-end for the encoding handler input function, but handle only
2018  * the very first line, i.e. limit itself to 45 chars.
2019  *
2020  * Returns the number of byte written if success, or
2021  *     -1 general error
2022  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2023  *        the result of transformation can't fit into the encoding we want), or
2024  */
2025 int
2026 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2027                  xmlBufferPtr in) {
2028     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2029 }
2030
2031 /**
2032  * xmlCharEncInFunc:
2033  * @handler:    char encoding transformation data structure
2034  * @out:  an xmlBuffer for the output.
2035  * @in:  an xmlBuffer for the input
2036  *
2037  * Generic front-end for the encoding handler input function
2038  *
2039  * Returns the number of byte written if success, or
2040  *     -1 general error
2041  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2042  *        the result of transformation can't fit into the encoding we want), or
2043  */
2044 int
2045 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2046                  xmlBufferPtr in)
2047 {
2048     int ret = -2;
2049     int written;
2050     int toconv;
2051
2052     if (handler == NULL)
2053         return (-1);
2054     if (out == NULL)
2055         return (-1);
2056     if (in == NULL)
2057         return (-1);
2058
2059     toconv = in->use;
2060     if (toconv == 0)
2061         return (0);
2062     written = out->size - out->use -1; /* count '\0' */
2063     if (toconv * 2 >= written) {
2064         xmlBufferGrow(out, out->size + toconv * 2);
2065         written = out->size - out->use - 1;
2066     }
2067     if (handler->input != NULL) {
2068         ret = handler->input(&out->content[out->use], &written,
2069                              in->content, &toconv);
2070         xmlBufferShrink(in, toconv);
2071         out->use += written;
2072         out->content[out->use] = 0;
2073     }
2074 #ifdef LIBXML_ICONV_ENABLED
2075     else if (handler->iconv_in != NULL) {
2076         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2077                               &written, in->content, &toconv);
2078         xmlBufferShrink(in, toconv);
2079         out->use += written;
2080         out->content[out->use] = 0;
2081         if (ret == -1)
2082             ret = -3;
2083     }
2084 #endif /* LIBXML_ICONV_ENABLED */
2085 #ifdef LIBXML_ICU_ENABLED
2086     else if (handler->uconv_in != NULL) {
2087         ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2088                               &written, in->content, &toconv);
2089         xmlBufferShrink(in, toconv);
2090         out->use += written;
2091         out->content[out->use] = 0;
2092         if (ret == -1)
2093             ret = -3;
2094     }
2095 #endif /* LIBXML_ICU_ENABLED */
2096     switch (ret) {
2097         case 0:
2098 #ifdef DEBUG_ENCODING
2099             xmlGenericError(xmlGenericErrorContext,
2100                             "converted %d bytes to %d bytes of input\n",
2101                             toconv, written);
2102 #endif
2103             break;
2104         case -1:
2105 #ifdef DEBUG_ENCODING
2106             xmlGenericError(xmlGenericErrorContext,
2107                          "converted %d bytes to %d bytes of input, %d left\n",
2108                             toconv, written, in->use);
2109 #endif
2110             break;
2111         case -3:
2112 #ifdef DEBUG_ENCODING
2113             xmlGenericError(xmlGenericErrorContext,
2114                         "converted %d bytes to %d bytes of input, %d left\n",
2115                             toconv, written, in->use);
2116 #endif
2117             break;
2118         case -2: {
2119             char buf[50];
2120
2121             snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2122                      in->content[0], in->content[1],
2123                      in->content[2], in->content[3]);
2124             buf[49] = 0;
2125             xmlEncodingErr(XML_I18N_CONV_FAILED,
2126                     "input conversion failed due to input error, bytes %s\n",
2127                            buf);
2128         }
2129     }
2130     /*
2131      * Ignore when input buffer is not on a boundary
2132      */
2133     if (ret == -3)
2134         ret = 0;
2135     return (written? written : ret);
2136 }
2137
2138 /**
2139  * xmlCharEncOutFunc:
2140  * @handler:    char enconding transformation data structure
2141  * @out:  an xmlBuffer for the output.
2142  * @in:  an xmlBuffer for the input
2143  *
2144  * Generic front-end for the encoding handler output function
2145  * a first call with @in == NULL has to be made firs to initiate the
2146  * output in case of non-stateless encoding needing to initiate their
2147  * state or the output (like the BOM in UTF16).
2148  * In case of UTF8 sequence conversion errors for the given encoder,
2149  * the content will be automatically remapped to a CharRef sequence.
2150  *
2151  * Returns the number of byte written if success, or
2152  *     -1 general error
2153  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2154  *        the result of transformation can't fit into the encoding we want), or
2155  */
2156 int
2157 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2158                   xmlBufferPtr in) {
2159     int ret = -2;
2160     int written;
2161     int writtentot = 0;
2162     int toconv;
2163     int output = 0;
2164     int charref_len = 0;
2165
2166     if (handler == NULL) return(-1);
2167     if (out == NULL) return(-1);
2168
2169 retry:
2170
2171     written = out->size - out->use;
2172
2173     if (written > 0)
2174         written--; /* Gennady: count '/0' */
2175
2176     /*
2177      * First specific handling of in = NULL, i.e. the initialization call
2178      */
2179     if (in == NULL) {
2180         toconv = 0;
2181         if (handler->output != NULL) {
2182             ret = handler->output(&out->content[out->use], &written,
2183                                   NULL, &toconv);
2184             if (ret >= 0) { /* Gennady: check return value */
2185                 out->use += written;
2186                 out->content[out->use] = 0;
2187             }
2188         }
2189 #ifdef LIBXML_ICONV_ENABLED
2190         else if (handler->iconv_out != NULL) {
2191             ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2192                                   &written, NULL, &toconv);
2193             out->use += written;
2194             out->content[out->use] = 0;
2195         }
2196 #endif /* LIBXML_ICONV_ENABLED */
2197 #ifdef LIBXML_ICU_ENABLED
2198         else if (handler->uconv_out != NULL) {
2199             ret = xmlUconvWrapper(handler->uconv_out, 0,
2200                               &out->content[out->use],
2201                                               &written, NULL, &toconv);
2202             out->use += written;
2203             out->content[out->use] = 0;
2204         }
2205 #endif /* LIBXML_ICU_ENABLED */
2206 #ifdef DEBUG_ENCODING
2207         xmlGenericError(xmlGenericErrorContext,
2208                 "initialized encoder\n");
2209 #endif
2210         return(0);
2211     }
2212
2213     /*
2214      * Conversion itself.
2215      */
2216     toconv = in->use;
2217     if (toconv == 0)
2218         return(0);
2219     if (toconv * 4 >= written) {
2220         xmlBufferGrow(out, toconv * 4);
2221         written = out->size - out->use - 1;
2222     }
2223     if (handler->output != NULL) {
2224         ret = handler->output(&out->content[out->use], &written,
2225                               in->content, &toconv);
2226         if (written > 0) {
2227             xmlBufferShrink(in, toconv);
2228             out->use += written;
2229             writtentot += written;
2230         }
2231         out->content[out->use] = 0;
2232     }
2233 #ifdef LIBXML_ICONV_ENABLED
2234     else if (handler->iconv_out != NULL) {
2235         ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2236                               &written, in->content, &toconv);
2237         xmlBufferShrink(in, toconv);
2238         out->use += written;
2239         writtentot += written;
2240         out->content[out->use] = 0;
2241         if (ret == -1) {
2242             if (written > 0) {
2243                 /*
2244                  * Can be a limitation of iconv
2245                  */
2246                 charref_len = 0;
2247                 goto retry;
2248             }
2249             ret = -3;
2250         }
2251     }
2252 #endif /* LIBXML_ICONV_ENABLED */
2253 #ifdef LIBXML_ICU_ENABLED
2254     else if (handler->uconv_out != NULL) {
2255         ret = xmlUconvWrapper(handler->uconv_out, 0,
2256                               &out->content[out->use],
2257                               &written, in->content, &toconv);
2258         xmlBufferShrink(in, toconv);
2259         out->use += written;
2260         writtentot += written;
2261         out->content[out->use] = 0;
2262         if (ret == -1) {
2263             if (written > 0) {
2264                 /*
2265                  * Can be a limitation of iconv
2266                  */
2267                 charref_len = 0;
2268                 goto retry;
2269             }
2270             ret = -3;
2271         }
2272     }
2273 #endif /* LIBXML_ICU_ENABLED */
2274     else {
2275         xmlEncodingErr(XML_I18N_NO_OUTPUT,
2276                        "xmlCharEncOutFunc: no output function !\n", NULL);
2277         return(-1);
2278     }
2279
2280     if (ret >= 0) output += ret;
2281
2282     /*
2283      * Attempt to handle error cases
2284      */
2285     switch (ret) {
2286         case 0:
2287 #ifdef DEBUG_ENCODING
2288             xmlGenericError(xmlGenericErrorContext,
2289                     "converted %d bytes to %d bytes of output\n",
2290                     toconv, written);
2291 #endif
2292             break;
2293         case -1:
2294 #ifdef DEBUG_ENCODING
2295             xmlGenericError(xmlGenericErrorContext,
2296                     "output conversion failed by lack of space\n");
2297 #endif
2298             break;
2299         case -3:
2300 #ifdef DEBUG_ENCODING
2301             xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2302                     toconv, written, in->use);
2303 #endif
2304             break;
2305         case -2: {
2306             int len = in->use;
2307             const xmlChar *utf = (const xmlChar *) in->content;
2308             int cur;
2309
2310             cur = xmlGetUTF8Char(utf, &len);
2311             if ((charref_len != 0) && (written < charref_len)) {
2312                 /*
2313                  * We attempted to insert a character reference and failed.
2314                  * Undo what was written and skip the remaining charref.
2315                  */
2316                 out->use -= written;
2317                 writtentot -= written;
2318                 xmlBufferShrink(in, charref_len - written);
2319                 charref_len = 0;
2320
2321                 ret = -1;
2322                 break;
2323             } else if (cur > 0) {
2324                 xmlChar charref[20];
2325
2326 #ifdef DEBUG_ENCODING
2327                 xmlGenericError(xmlGenericErrorContext,
2328                         "handling output conversion error\n");
2329                 xmlGenericError(xmlGenericErrorContext,
2330                         "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2331                         in->content[0], in->content[1],
2332                         in->content[2], in->content[3]);
2333 #endif
2334                 /*
2335                  * Removes the UTF8 sequence, and replace it by a charref
2336                  * and continue the transcoding phase, hoping the error
2337                  * did not mangle the encoder state.
2338                  */
2339                 charref_len = snprintf((char *) &charref[0], sizeof(charref),
2340                                  "&#%d;", cur);
2341                 xmlBufferShrink(in, len);
2342                 xmlBufferAddHead(in, charref, -1);
2343
2344                 goto retry;
2345             } else {
2346                 char buf[50];
2347
2348                 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2349                          in->content[0], in->content[1],
2350                          in->content[2], in->content[3]);
2351                 buf[49] = 0;
2352                 xmlEncodingErr(XML_I18N_CONV_FAILED,
2353                     "output conversion failed due to conv error, bytes %s\n",
2354                                buf);
2355                 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2356                     in->content[0] = ' ';
2357             }
2358             break;
2359         }
2360     }
2361     return(ret);
2362 }
2363
2364 /**
2365  * xmlCharEncCloseFunc:
2366  * @handler:    char enconding transformation data structure
2367  *
2368  * Generic front-end for encoding handler close function
2369  *
2370  * Returns 0 if success, or -1 in case of error
2371  */
2372 int
2373 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2374     int ret = 0;
2375     int tofree = 0;
2376     if (handler == NULL) return(-1);
2377     if (handler->name == NULL) return(-1);
2378 #ifdef LIBXML_ICONV_ENABLED
2379     /*
2380      * Iconv handlers can be used only once, free the whole block.
2381      * and the associated icon resources.
2382      */
2383     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2384         tofree = 1;
2385         if (handler->iconv_out != NULL) {
2386             if (iconv_close(handler->iconv_out))
2387                 ret = -1;
2388             handler->iconv_out = NULL;
2389         }
2390         if (handler->iconv_in != NULL) {
2391             if (iconv_close(handler->iconv_in))
2392                 ret = -1;
2393             handler->iconv_in = NULL;
2394         }
2395     }
2396 #endif /* LIBXML_ICONV_ENABLED */
2397 #ifdef LIBXML_ICU_ENABLED
2398     if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2399         tofree = 1;
2400         if (handler->uconv_out != NULL) {
2401             closeIcuConverter(handler->uconv_out);
2402             handler->uconv_out = NULL;
2403         }
2404         if (handler->uconv_in != NULL) {
2405             closeIcuConverter(handler->uconv_in);
2406             handler->uconv_in = NULL;
2407         }
2408     }
2409 #endif
2410     if (tofree) {
2411         /* free up only dynamic handlers iconv/uconv */
2412         if (handler->name != NULL)
2413             xmlFree(handler->name);
2414         handler->name = NULL;
2415         xmlFree(handler);
2416     }
2417 #ifdef DEBUG_ENCODING
2418     if (ret)
2419         xmlGenericError(xmlGenericErrorContext,
2420                 "failed to close the encoding handler\n");
2421     else
2422         xmlGenericError(xmlGenericErrorContext,
2423                 "closed the encoding handler\n");
2424 #endif
2425
2426     return(ret);
2427 }
2428
2429 /**
2430  * xmlByteConsumed:
2431  * @ctxt: an XML parser context
2432  *
2433  * This function provides the current index of the parser relative
2434  * to the start of the current entity. This function is computed in
2435  * bytes from the beginning starting at zero and finishing at the
2436  * size in byte of the file if parsing a file. The function is
2437  * of constant cost if the input is UTF-8 but can be costly if run
2438  * on non-UTF-8 input.
2439  *
2440  * Returns the index in bytes from the beginning of the entity or -1
2441  *         in case the index could not be computed.
2442  */
2443 long
2444 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2445     xmlParserInputPtr in;
2446
2447     if (ctxt == NULL) return(-1);
2448     in = ctxt->input;
2449     if (in == NULL)  return(-1);
2450     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2451         unsigned int unused = 0;
2452         xmlCharEncodingHandler * handler = in->buf->encoder;
2453         /*
2454          * Encoding conversion, compute the number of unused original
2455          * bytes from the input not consumed and substract that from
2456          * the raw consumed value, this is not a cheap operation
2457          */
2458         if (in->end - in->cur > 0) {
2459             unsigned char convbuf[32000];
2460             const unsigned char *cur = (const unsigned char *)in->cur;
2461             int toconv = in->end - in->cur, written = 32000;
2462
2463             int ret;
2464
2465             if (handler->output != NULL) {
2466                 do {
2467                     toconv = in->end - cur;
2468                     written = 32000;
2469                     ret = handler->output(&convbuf[0], &written,
2470                                       cur, &toconv);
2471                     if (ret == -1) return(-1);
2472                     unused += written;
2473                     cur += toconv;
2474                 } while (ret == -2);
2475 #ifdef LIBXML_ICONV_ENABLED
2476             } else if (handler->iconv_out != NULL) {
2477                 do {
2478                     toconv = in->end - cur;
2479                     written = 32000;
2480                     ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2481                               &written, cur, &toconv);
2482                     if (ret < 0) {
2483                         if (written > 0)
2484                             ret = -2;
2485                         else
2486                             return(-1);
2487                     }
2488                     unused += written;
2489                     cur += toconv;
2490                 } while (ret == -2);
2491 #endif
2492 #ifdef LIBXML_ICU_ENABLED
2493             } else if (handler->uconv_out != NULL) {
2494                 do {
2495                     toconv = in->end - cur;
2496                     written = 32000;
2497                     ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2498                               &written, cur, &toconv);
2499                     if (ret < 0) {
2500                         if (written > 0)
2501                             ret = -2;
2502                         else
2503                             return(-1);
2504                     }
2505                     unused += written;
2506                     cur += toconv;
2507                 } while (ret == -2);
2508 #endif
2509             } else {
2510                 /* could not find a converter */
2511                 return(-1);
2512             }
2513         }
2514         if (in->buf->rawconsumed < unused)
2515             return(-1);
2516         return(in->buf->rawconsumed - unused);
2517     }
2518     return(in->consumed + (in->cur - in->base));
2519 }
2520
2521 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2522 #ifdef LIBXML_ISO8859X_ENABLED
2523
2524 /**
2525  * UTF8ToISO8859x:
2526  * @out:  a pointer to an array of bytes to store the result
2527  * @outlen:  the length of @out
2528  * @in:  a pointer to an array of UTF-8 chars
2529  * @inlen:  the length of @in
2530  * @xlattable: the 2-level transcoding table
2531  *
2532  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2533  * block of chars out.
2534  *
2535  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2536  * The value of @inlen after return is the number of octets consumed
2537  *     as the return value is positive, else unpredictable.
2538  * The value of @outlen after return is the number of ocetes consumed.
2539  */
2540 static int
2541 UTF8ToISO8859x(unsigned char* out, int *outlen,
2542               const unsigned char* in, int *inlen,
2543               unsigned char const *xlattable) {
2544     const unsigned char* outstart = out;
2545     const unsigned char* inend;
2546     const unsigned char* instart = in;
2547     const unsigned char* processed = in;
2548
2549     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2550         (xlattable == NULL))
2551         return(-1);
2552     if (in == NULL) {
2553         /*
2554         * initialization nothing to do
2555         */
2556         *outlen = 0;
2557         *inlen = 0;
2558         return(0);
2559     }
2560     inend = in + (*inlen);
2561     while (in < inend) {
2562         unsigned char d = *in++;
2563         if  (d < 0x80)  {
2564             *out++ = d;
2565         } else if (d < 0xC0) {
2566             /* trailing byte in leading position */
2567             *outlen = out - outstart;
2568             *inlen = processed - instart;
2569             return(-2);
2570         } else if (d < 0xE0) {
2571             unsigned char c;
2572             if (!(in < inend)) {
2573                 /* trailing byte not in input buffer */
2574                 *outlen = out - outstart;
2575                 *inlen = processed - instart;
2576                 return(-3);
2577             }
2578             c = *in++;
2579             if ((c & 0xC0) != 0x80) {
2580                 /* not a trailing byte */
2581                 *outlen = out - outstart;
2582                 *inlen = processed - instart;
2583                 return(-2);
2584             }
2585             c = c & 0x3F;
2586             d = d & 0x1F;
2587             d = xlattable [48 + c + xlattable [d] * 64];
2588             if (d == 0) {
2589                 /* not in character set */
2590                 *outlen = out - outstart;
2591                 *inlen = processed - instart;
2592                 return(-2);
2593             }
2594             *out++ = d;
2595         } else if (d < 0xF0) {
2596             unsigned char c1;
2597             unsigned char c2;
2598             if (!(in < inend - 1)) {
2599                 /* trailing bytes not in input buffer */
2600                 *outlen = out - outstart;
2601                 *inlen = processed - instart;
2602                 return(-3);
2603             }
2604             c1 = *in++;
2605             if ((c1 & 0xC0) != 0x80) {
2606                 /* not a trailing byte (c1) */
2607                 *outlen = out - outstart;
2608                 *inlen = processed - instart;
2609                 return(-2);
2610             }
2611             c2 = *in++;
2612             if ((c2 & 0xC0) != 0x80) {
2613                 /* not a trailing byte (c2) */
2614                 *outlen = out - outstart;
2615                 *inlen = processed - instart;
2616                 return(-2);
2617             }
2618             c1 = c1 & 0x3F;
2619             c2 = c2 & 0x3F;
2620             d = d & 0x0F;
2621             d = xlattable [48 + c2 + xlattable [48 + c1 +
2622                         xlattable [32 + d] * 64] * 64];
2623             if (d == 0) {
2624                 /* not in character set */
2625                 *outlen = out - outstart;
2626                 *inlen = processed - instart;
2627                 return(-2);
2628             }
2629             *out++ = d;
2630         } else {
2631             /* cannot transcode >= U+010000 */
2632             *outlen = out - outstart;
2633             *inlen = processed - instart;
2634             return(-2);
2635         }
2636         processed = in;
2637     }
2638     *outlen = out - outstart;
2639     *inlen = processed - instart;
2640     return(*outlen);
2641 }
2642
2643 /**
2644  * ISO8859xToUTF8
2645  * @out:  a pointer to an array of bytes to store the result
2646  * @outlen:  the length of @out
2647  * @in:  a pointer to an array of ISO Latin 1 chars
2648  * @inlen:  the length of @in
2649  *
2650  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2651  * block of chars out.
2652  * Returns 0 if success, or -1 otherwise
2653  * The value of @inlen after return is the number of octets consumed
2654  * The value of @outlen after return is the number of ocetes produced.
2655  */
2656 static int
2657 ISO8859xToUTF8(unsigned char* out, int *outlen,
2658               const unsigned char* in, int *inlen,
2659               unsigned short const *unicodetable) {
2660     unsigned char* outstart = out;
2661     unsigned char* outend;
2662     const unsigned char* instart = in;
2663     const unsigned char* inend;
2664     const unsigned char* instop;
2665     unsigned int c;
2666
2667     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2668         (in == NULL) || (unicodetable == NULL))
2669         return(-1);
2670     outend = out + *outlen;
2671     inend = in + *inlen;
2672     instop = inend;
2673
2674     while ((in < inend) && (out < outend - 2)) {
2675         if (*in >= 0x80) {
2676             c = unicodetable [*in - 0x80];
2677             if (c == 0) {
2678                 /* undefined code point */
2679                 *outlen = out - outstart;
2680                 *inlen = in - instart;
2681                 return (-1);
2682             }
2683             if (c < 0x800) {
2684                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
2685                 *out++ = (c & 0x3F) | 0x80;
2686             } else {
2687                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
2688                 *out++ = ((c >>  6) & 0x3F) | 0x80;
2689                 *out++ = (c & 0x3F) | 0x80;
2690             }
2691             ++in;
2692         }
2693         if (instop - in > outend - out) instop = in + (outend - out);
2694         while ((*in < 0x80) && (in < instop)) {
2695             *out++ = *in++;
2696         }
2697     }
2698     if ((in < inend) && (out < outend) && (*in < 0x80)) {
2699         *out++ =  *in++;
2700     }
2701     if ((in < inend) && (out < outend) && (*in < 0x80)) {
2702         *out++ =  *in++;
2703     }
2704     *outlen = out - outstart;
2705     *inlen = in - instart;
2706     return (*outlen);
2707 }
2708
2709
2710 /************************************************************************
2711  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2712  ************************************************************************/
2713
2714 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2715     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2716     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2717     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2718     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2719     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2720     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2721     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2722     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2723     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2724     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2725     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2726     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2727     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2728     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2729     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2730     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2731 };
2732
2733 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2734     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2735     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2737     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2742     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2743     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2744     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2745     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2746     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2749     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2750     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2753     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2754     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2755     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2756     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2757     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2758     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2759     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2760     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2761 };
2762
2763 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2764     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2765     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2766     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2767     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2768     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2769     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2770     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2771     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2772     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2773     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2774     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2775     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2776     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2777     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2778     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2779     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2780 };
2781
2782 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2783     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2784     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2786     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2791     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2792     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2793     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2794     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2795     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2796     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2797     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2798     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2800     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2802     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2805     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2808     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2810     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2811     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2812     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2813     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2814 };
2815
2816 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2817     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2818     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2819     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2820     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2821     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2822     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2823     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2824     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2825     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2826     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2827     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2828     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2829     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2830     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2831     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2832     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2833 };
2834
2835 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2836     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2837     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2843     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2844     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2845     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2846     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2847     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2848     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2849     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2850     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2851     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2852     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2853     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2854     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2855     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2856     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2857     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2860     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2861     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2862     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2863 };
2864
2865 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2866     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2867     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2868     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2869     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2870     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2871     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2872     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2873     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2874     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2875     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2876     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2877     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2878     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2879     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2880     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2881     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2882 };
2883
2884 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2885     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2886     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2893     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2894     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2895     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2897     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2898     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2899     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2900     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2901     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2902     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2903     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2904     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2905     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2906     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2907     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2911     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2912 };
2913
2914 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2915     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2916     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2917     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2918     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2919     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2920     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2921     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2922     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2923     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2924     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2925     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2926     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2927     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2928     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2929     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2930     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2931 };
2932
2933 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2934     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2936     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2942     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2943     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2944     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2948     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2950     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2951     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2952     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2953     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2954     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 };
2958
2959 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2960     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2961     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2962     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2963     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2964     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2965     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2966     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2967     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2968     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2969     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2970     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2971     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2972     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2973     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2974     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2975     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2976 };
2977
2978 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2979     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2980     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2982     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2983     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2984     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2987     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2988     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2989     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2990     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2996     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3003     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3004     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3005     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3006     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3007     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3010 };
3011
3012 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3013     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3014     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3015     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3016     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3017     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3018     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3019     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3020     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3021     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3022     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3023     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3024     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3025     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3026     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3027     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3028     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3029 };
3030
3031 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3032     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3034     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3040     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3041     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3042     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3043     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3049     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3051     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3056     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3057     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3061     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3062     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 };
3064
3065 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3066     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3067     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3068     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3069     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3070     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3071     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3072     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3073     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3074     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3075     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3076     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3077     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3078     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3079     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3080     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3081     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3082 };
3083
3084 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3085     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3093     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3094     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3095     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3096     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3097     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3098     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3099     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3100     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3102     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3106     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 };
3109
3110 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3111     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3112     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3113     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3114     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3115     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3116     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3117     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3118     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3119     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3120     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3121     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3122     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3123     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3124     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3125     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3126     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3127 };
3128
3129 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3130     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3138     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3139     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3140     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3141     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3142     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3143     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3144     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3145     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3146     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3148     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3149     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3150     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3158     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3159     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3160     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3161 };
3162
3163 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3164     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3165     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3166     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3167     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3168     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3169     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3170     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3171     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3172     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3173     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3174     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3175     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3176     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3177     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3178     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3179     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3180 };
3181
3182 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3183     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3191     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3192     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3198     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3199     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3200     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3201     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3202     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3207     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3208     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3209     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3210 };
3211
3212 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3213     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3214     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3215     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3216     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3217     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3218     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3219     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3220     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3221     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3222     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3223     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3224     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3225     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3226     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3227     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3228     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3229 };
3230
3231 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3232     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3240     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3241     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3242     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3243     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3249     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3252     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3253     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3254     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3255     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3256     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3257     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3258     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3259     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3260     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3261     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3262     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3263 };
3264
3265 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3266     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3267     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3268     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3269     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3270     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3271     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3272     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3273     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3274     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3275     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3276     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3277     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3278     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3279     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3280     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3281     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3282 };
3283
3284 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3285     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3293     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3294     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3295     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3300     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3301     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3302     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3305     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3315     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3320     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3323     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3325     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3326     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3327     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3328 };
3329
3330 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3331     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3332     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3333     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3334     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3335     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3336     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3337     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3338     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3339     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3340     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3341     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3342     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3343     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3344     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3345     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3346     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3347 };
3348
3349 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3350     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3358     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3359     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3360     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3361     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3373     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3374     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3375     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3376     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3377 };
3378
3379 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3380     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3381     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3382     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3383     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3384     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3385     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3386     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3387     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3388     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3389     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3390     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3391     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3392     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3393     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3394     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3395     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3396 };
3397
3398 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3399     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3400     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3407     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3408     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3409     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3410     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3411     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3416     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3418     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3425     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3428     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3435     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3436     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3437     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3438 };
3439
3440
3441 /*
3442  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3443  */
3444
3445 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3446     const unsigned char* in, int *inlen) {
3447     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3448 }
3449 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3450     const unsigned char* in, int *inlen) {
3451     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3452 }
3453
3454 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3455     const unsigned char* in, int *inlen) {
3456     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3457 }
3458 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3459     const unsigned char* in, int *inlen) {
3460     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3461 }
3462
3463 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3464     const unsigned char* in, int *inlen) {
3465     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3466 }
3467 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3468     const unsigned char* in, int *inlen) {
3469     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3470 }
3471
3472 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3473     const unsigned char* in, int *inlen) {
3474     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3475 }
3476 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3477     const unsigned char* in, int *inlen) {
3478     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3479 }
3480
3481 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3482     const unsigned char* in, int *inlen) {
3483     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3484 }
3485 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3486     const unsigned char* in, int *inlen) {
3487     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3488 }
3489
3490 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3491     const unsigned char* in, int *inlen) {
3492     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3493 }
3494 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3495     const unsigned char* in, int *inlen) {
3496     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3497 }
3498
3499 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3500     const unsigned char* in, int *inlen) {
3501     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3502 }
3503 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3504     const unsigned char* in, int *inlen) {
3505     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3506 }
3507
3508 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3509     const unsigned char* in, int *inlen) {
3510     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3511 }
3512 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3513     const unsigned char* in, int *inlen) {
3514     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3515 }
3516
3517 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3518     const unsigned char* in, int *inlen) {
3519     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3520 }
3521 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3522     const unsigned char* in, int *inlen) {
3523     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3524 }
3525
3526 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3527     const unsigned char* in, int *inlen) {
3528     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3529 }
3530 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3531     const unsigned char* in, int *inlen) {
3532     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3533 }
3534
3535 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3536     const unsigned char* in, int *inlen) {
3537     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3538 }
3539 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3540     const unsigned char* in, int *inlen) {
3541     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3542 }
3543
3544 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3545     const unsigned char* in, int *inlen) {
3546     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3547 }
3548 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3549     const unsigned char* in, int *inlen) {
3550     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3551 }
3552
3553 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3554     const unsigned char* in, int *inlen) {
3555     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3556 }
3557 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3558     const unsigned char* in, int *inlen) {
3559     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3560 }
3561
3562 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3563     const unsigned char* in, int *inlen) {
3564     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3565 }
3566 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3567     const unsigned char* in, int *inlen) {
3568     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3569 }
3570
3571 static void
3572 xmlRegisterCharEncodingHandlersISO8859x (void) {
3573     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3574     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3575     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3576     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3577     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3578     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3579     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3580     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3581     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3582     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3583     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3584     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3585     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3586     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3587 }
3588
3589 #endif
3590 #endif
3591
3592 #define bottom_encoding
3593 #include "elfgcchack.h"