- add sources.
[platform/framework/web/crosswalk.git] / src / third_party / libxml / patches / icu
1 Add code support for ICU.
2
3 diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c
4 index b86a547..0f41df9 100644
5 --- a/third_party/libxml/encoding.c
6 +++ b/third_party/libxml/encoding.c
7 @@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
8  static int xmlCharEncodingAliasesNb = 0;
9  static int xmlCharEncodingAliasesMax = 0;
10  
11 -#ifdef LIBXML_ICONV_ENABLED
12 +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
13  #if 0
14  #define DEBUG_ENCODING  /* Define this to get encoding traces */
15  #endif
16 @@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
17                      NULL, 0, val, NULL, NULL, 0, 0, msg, val);
18  }
19  
20 +#ifdef LIBXML_ICU_ENABLED
21 +static uconv_t* 
22 +openIcuConverter(const char* name, int toUnicode)
23 +{
24 +  UErrorCode status = U_ZERO_ERROR;
25 +  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
26 +  if (conv == NULL)
27 +    return NULL;
28 +
29 +  conv->uconv = ucnv_open(name, &status);
30 +  if (U_FAILURE(status))
31 +    goto error;
32 +
33 +  status = U_ZERO_ERROR;
34 +  if (toUnicode) {
35 +    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 
36 +                        NULL, NULL, NULL, &status);
37 +  }
38 +  else {
39 +    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 
40 +                        NULL, NULL, NULL, &status);
41 +  }
42 +  if (U_FAILURE(status))
43 +    goto error;
44 +
45 +  status = U_ZERO_ERROR;
46 +  conv->utf8 = ucnv_open("UTF-8", &status);
47 +  if (U_SUCCESS(status))
48 +    return conv;
49 +
50 +error:
51 +  if (conv->uconv) 
52 +    ucnv_close(conv->uconv);
53 +  xmlFree(conv);
54 +  return NULL;
55 +}
56 +
57 +static void
58 +closeIcuConverter(uconv_t *conv)
59 +{
60 +  if (conv != NULL) {
61 +    ucnv_close(conv->uconv);
62 +    ucnv_close(conv->utf8);
63 +    xmlFree(conv);
64 +  }
65 +}
66 +#endif /* LIBXML_ICU_ENABLED */
67 +
68  /************************************************************************
69   *                                                                     *
70   *             Conversions To/From UTF8 encoding                       *
71 @@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name,
72  #ifdef LIBXML_ICONV_ENABLED
73      handler->iconv_in = NULL;
74      handler->iconv_out = NULL;
75 -#endif /* LIBXML_ICONV_ENABLED */
76 +#endif
77 +#ifdef LIBXML_ICU_ENABLED
78 +    handler->uconv_in = NULL;
79 +    handler->uconv_out = NULL;
80 +#endif
81  
82      /*
83       * registers and returns the handler.
84 @@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) {
85      xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
86      xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
87  #endif /* LIBXML_OUTPUT_ENABLED */
88 -#ifndef LIBXML_ICONV_ENABLED
89 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
90  #ifdef LIBXML_ISO8859X_ENABLED
91      xmlRegisterCharEncodingHandlersISO8859x ();
92  #endif
93 @@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) {
94      xmlCharEncodingHandlerPtr enc;
95      iconv_t icv_in, icv_out;
96  #endif /* LIBXML_ICONV_ENABLED */
97 +#ifdef LIBXML_ICU_ENABLED
98 +    xmlCharEncodingHandlerPtr enc;
99 +    uconv_t *ucv_in, *ucv_out;
100 +#endif /* LIBXML_ICU_ENABLED */
101      char upper[100];
102      int i;
103  
104 @@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) {
105                     "iconv : problems with filters for '%s'\n", name);
106      }
107  #endif /* LIBXML_ICONV_ENABLED */
108 +#ifdef LIBXML_ICU_ENABLED
109 +    /* check whether icu can handle this */
110 +    ucv_in = openIcuConverter(name, 1);
111 +    ucv_out = openIcuConverter(name, 0);
112 +    if (ucv_in != NULL && ucv_out != NULL) {
113 +           enc = (xmlCharEncodingHandlerPtr)
114 +                 xmlMalloc(sizeof(xmlCharEncodingHandler));
115 +           if (enc == NULL) {
116 +                closeIcuConverter(ucv_in);
117 +                closeIcuConverter(ucv_out);
118 +               return(NULL);
119 +           }
120 +           enc->name = xmlMemStrdup(name);
121 +           enc->input = NULL;
122 +           enc->output = NULL;
123 +           enc->uconv_in = ucv_in;
124 +           enc->uconv_out = ucv_out;
125 +#ifdef DEBUG_ENCODING
126 +            xmlGenericError(xmlGenericErrorContext,
127 +                   "Found ICU converter handler for encoding %s\n", name);
128 +#endif
129 +           return enc;
130 +    } else if (ucv_in != NULL || ucv_out != NULL) {
131 +            closeIcuConverter(ucv_in);
132 +            closeIcuConverter(ucv_out);
133 +           xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
134 +                   "ICU converter : problems with filters for '%s'\n", name);
135 +    }
136 +#endif /* LIBXML_ICU_ENABLED */
137  
138  #ifdef DEBUG_ENCODING
139      xmlGenericError(xmlGenericErrorContext,
140 @@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
141  
142  /************************************************************************
143   *                                                                     *
144 + *             ICU based generic conversion functions                  *
145 + *                                                                     *
146 + ************************************************************************/
147 +
148 +#ifdef LIBXML_ICU_ENABLED
149 +/**
150 + * xmlUconvWrapper:
151 + * @cd: ICU uconverter data structure
152 + * @toUnicode : non-zero if toUnicode. 0 otherwise.
153 + * @out:  a pointer to an array of bytes to store the result
154 + * @outlen:  the length of @out
155 + * @in:  a pointer to an array of ISO Latin 1 chars
156 + * @inlen:  the length of @in
157 + *
158 + * Returns 0 if success, or 
159 + *     -1 by lack of space, or
160 + *     -2 if the transcoding fails (for *in is not valid utf8 string or
161 + *        the result of transformation can't fit into the encoding we want), or
162 + *     -3 if there the last byte can't form a single output char.
163 + *     
164 + * The value of @inlen after return is the number of octets consumed
165 + *     as the return value is positive, else unpredictable.
166 + * The value of @outlen after return is the number of ocetes consumed.
167 + */
168 +static int
169 +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
170 +                const unsigned char *in, int *inlen) {
171 +    const char *ucv_in = (const char *) in;
172 +    char *ucv_out = (char *) out;
173 +    UErrorCode err = U_ZERO_ERROR;
174 +
175 +    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
176 +        if (outlen != NULL) *outlen = 0;
177 +        return(-1);
178 +    }
179 +
180 +    /* 
181 +     * TODO(jungshik)
182 +     * 1. is ucnv_convert(To|From)Algorithmic better?
183 +     * 2. had we better use an explicit pivot buffer?
184 +     * 3. error returned comes from 'fromUnicode' only even
185 +     *    when toUnicode is true !
186 +     */
187 +    if (toUnicode) {
188 +        /* encoding => UTF-16 => UTF-8 */
189 +        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
190 +                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
191 +                       0, TRUE, &err);
192 +    } else {
193 +        /* UTF-8 => UTF-16 => encoding */
194 +        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
195 +                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
196 +                       0, TRUE, &err);
197 +    }
198 +    *inlen = ucv_in - (const char*) in; 
199 +    *outlen = ucv_out - (char *) out;
200 +    if (U_SUCCESS(err))
201 +        return 0;
202 +    if (err == U_BUFFER_OVERFLOW_ERROR)
203 +        return -1;
204 +    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
205 +        return -2;
206 +    /* if (err == U_TRUNCATED_CHAR_FOUND) */
207 +    return -3;
208 +}
209 +#endif /* LIBXML_ICU_ENABLED */
210 +
211 +/************************************************************************
212 + *                                                                     *
213   *             The real API used by libxml for on-the-fly conversion   *
214   *                                                                     *
215   ************************************************************************/
216 @@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
217         if (ret == -1) ret = -3;
218      }
219  #endif /* LIBXML_ICONV_ENABLED */
220 +#ifdef LIBXML_ICU_ENABLED
221 +    else if (handler->uconv_in != NULL) {
222 +       ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
223 +                             &written, in->content, &toconv);
224 +       xmlBufferShrink(in, toconv);
225 +       out->use += written;
226 +       out->content[out->use] = 0;
227 +       if (ret == -1) ret = -3;
228 +    }
229 +#endif /* LIBXML_ICU_ENABLED */
230  #ifdef DEBUG_ENCODING
231      switch (ret) {
232          case 0:
233 @@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
234              ret = -3;
235      }
236  #endif /* LIBXML_ICONV_ENABLED */
237 +#ifdef LIBXML_ICU_ENABLED
238 +    else if (handler->uconv_in != NULL) {
239 +        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
240 +                              &written, in->content, &toconv);
241 +        xmlBufferShrink(in, toconv);
242 +        out->use += written;
243 +        out->content[out->use] = 0;
244 +        if (ret == -1)
245 +            ret = -3;
246 +    }
247 +#endif /* LIBXML_ICU_ENABLED */
248      switch (ret) {
249          case 0:
250  #ifdef DEBUG_ENCODING
251 @@ -2015,6 +2190,15 @@ retry:
252             out->content[out->use] = 0;
253         }
254  #endif /* LIBXML_ICONV_ENABLED */
255 +#ifdef LIBXML_ICU_ENABLED
256 +       else if (handler->uconv_out != NULL) {
257 +           ret = xmlUconvWrapper(handler->uconv_out, 0,
258 +                              &out->content[out->use],
259 +                                             &written, NULL, &toconv);
260 +           out->use += written;
261 +           out->content[out->use] = 0;
262 +       }
263 +#endif /* LIBXML_ICU_ENABLED */
264  #ifdef DEBUG_ENCODING
265         xmlGenericError(xmlGenericErrorContext,
266                 "initialized encoder\n");
267 @@ -2061,6 +2245,26 @@ retry:
268         }
269      }
270  #endif /* LIBXML_ICONV_ENABLED */
271 +#ifdef LIBXML_ICU_ENABLED
272 +    else if (handler->uconv_out != NULL) {
273 +       ret = xmlUconvWrapper(handler->uconv_out, 0,
274 +                              &out->content[out->use],
275 +                             &written, in->content, &toconv);
276 +       xmlBufferShrink(in, toconv);
277 +       out->use += written;
278 +       writtentot += written;
279 +       out->content[out->use] = 0;
280 +       if (ret == -1) {
281 +           if (written > 0) {
282 +               /*
283 +                * Can be a limitation of iconv
284 +                */
285 +               goto retry;
286 +           }
287 +           ret = -3;
288 +       }
289 +    }
290 +#endif /* LIBXML_ICU_ENABLED */
291      else {
292         xmlEncodingErr(XML_I18N_NO_OUTPUT,
293                        "xmlCharEncOutFunc: no output function !\n", NULL);
294 @@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
295         xmlFree(handler);
296      }
297  #endif /* LIBXML_ICONV_ENABLED */
298 +#ifdef LIBXML_ICU_ENABLED
299 +    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
300 +       if (handler->name != NULL)
301 +           xmlFree(handler->name);
302 +       handler->name = NULL;
303 +       if (handler->uconv_out != NULL) {
304 +           closeIcuConverter(handler->uconv_out);
305 +           handler->uconv_out = NULL;
306 +       }
307 +       if (handler->uconv_in != NULL) {
308 +           closeIcuConverter(handler->uconv_in);
309 +           handler->uconv_in = NULL;
310 +       }
311 +       xmlFree(handler);
312 +    }
313 +#endif
314  #ifdef DEBUG_ENCODING
315      if (ret)
316          xmlGenericError(xmlGenericErrorContext,
317 @@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
318                     cur += toconv;
319                 } while (ret == -2);
320  #endif
321 +#ifdef LIBXML_ICU_ENABLED
322 +           } else if (handler->uconv_out != NULL) {
323 +               do {
324 +                   toconv = in->end - cur;
325 +                   written = 32000;
326 +                   ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
327 +                             &written, cur, &toconv);
328 +                   if (ret < 0) {
329 +                       if (written > 0)
330 +                           ret = -2;
331 +                       else
332 +                           return(-1);
333 +                   }
334 +                   unused += written;
335 +                   cur += toconv;
336 +               } while (ret == -2);
337              } else {
338                 /* could not find a converter */
339                 return(-1);
340 @@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
341      }
342      return(in->consumed + (in->cur - in->base));
343  }
344 +#endif
345  
346 -#ifndef LIBXML_ICONV_ENABLED
347 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
348  #ifdef LIBXML_ISO8859X_ENABLED
349  
350  /**
351 diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/include/libxml/encoding.h
352 index c74b25f..b5f8b48 100644
353 --- a/third_party/libxml/include/libxml/encoding.h
354 +++ b/third_party/libxml/include/libxml/encoding.h
355 @@ -26,6 +26,24 @@
356  
357  #ifdef LIBXML_ICONV_ENABLED
358  #include <iconv.h>
359 +#else 
360 +#ifdef LIBXML_ICU_ENABLED
361 +#include <unicode/ucnv.h>
362 +#if 0
363 +/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h>
364 + * to prevent unwanted ICU symbols being exposed to users of libxml2.
365 + * One particular case is Qt4 conflicting on UChar32.
366 + */
367 +#include <stdint.h>
368 +struct UConverter;
369 +typedef struct UConverter UConverter;
370 +#ifdef _MSC_VER
371 +typedef wchar_t UChar;
372 +#else
373 +typedef uint16_t UChar;
374 +#endif
375 +#endif
376 +#endif
377  #endif
378  #ifdef __cplusplus
379  extern "C" {
380 @@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
381   * Block defining the handlers for non UTF-8 encodings.
382   * If iconv is supported, there are two extra fields.
383   */
384 +#ifdef LIBXML_ICU_ENABLED
385 +struct _uconv_t {
386 +  UConverter *uconv; /* for conversion between an encoding and UTF-16 */
387 +  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
388 +};
389 +typedef struct _uconv_t uconv_t;
390 +#endif
391  
392  typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
393  typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
394 @@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler {
395      iconv_t                    iconv_in;
396      iconv_t                    iconv_out;
397  #endif /* LIBXML_ICONV_ENABLED */
398 +#ifdef LIBXML_ICU_ENABLED
399 +    uconv_t                    *uconv_in;
400 +    uconv_t                    *uconv_out;
401 +#endif /* LIBXML_ICU_ENABLED */
402  };
403  
404  #ifdef __cplusplus
405 diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/include/libxml/parser.h
406 index dd79c42..3580b63 100644
407 --- a/third_party/libxml/include/libxml/parser.h
408 +++ b/third_party/libxml/include/libxml/parser.h
409 @@ -1222,6 +1222,7 @@ typedef enum {
410      XML_WITH_DEBUG_MEM = 29,
411      XML_WITH_DEBUG_RUN = 30,
412      XML_WITH_ZLIB = 31,
413 +    XML_WITH_ICU = 32,
414      XML_WITH_NONE = 99999 /* just to be sure of allocation size */
415  } xmlFeature;
416  
417 diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/libxml/include/libxml/xmlversion.h.in
418 index 4739f3a..de310ab 100644
419 --- a/third_party/libxml/include/libxml/xmlversion.h.in
420 +++ b/third_party/libxml/include/libxml/xmlversion.h.in
421 @@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
422  #endif
423  
424  /**
425 + * LIBXML_ICU_ENABLED:
426 + *
427 + * Whether icu support is available
428 + */
429 +#if @WITH_ICU@
430 +#define LIBXML_ICU_ENABLED
431 +#endif
432 +
433 +/**
434   * LIBXML_ISO8859X_ENABLED:
435   *
436   * Whether ISO-8859-* support is made available in case iconv is not
437 diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c
438 index 85e7599..3ba2a06 100644
439 --- a/third_party/libxml/parser.c
440 +++ b/third_party/libxml/parser.c
441 @@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature)
442  #else
443              return(0);
444  #endif
445 +        case XML_WITH_ICU:
446 +#ifdef LIBXML_ICU_ENABLED
447 +            return(1);
448 +#else
449 +            return(0);
450 +#endif
451          default:
452             break;
453       }