src/third_party/icu/patches/uconv.patch

   1 Index: source/common/ucnv2022.cpp
   2 ===================================================================
   3 --- source/common/ucnv2022.cpp  (revision 259715)
   4 +++ source/common/ucnv2022.cpp  (working copy)
   5 @@ -154,7 +154,11 @@
   6  } StateEnum;
   7
   8  /* is the StateEnum charset value for a DBCS charset? */
   9 +#if UCONFIG_NO_NON_HTML5_CONVERSION
  10 +#define IS_JP_DBCS(cs) (JISX208==(cs))
  11 +#else
  12  #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
  13 +#endif
  14
  15  #define CSM(cs) ((uint16_t)1<<(cs))
  16
  17 @@ -167,13 +171,23 @@
  18   *   all versions, not just JIS7 and JIS8.
  19   * - ICU does not distinguish between different versions of JIS X 0208.
  20   */
  21 +#if UCONFIG_NO_NON_HTML5_CONVERSION
  22 +enum { MAX_JA_VERSION=0 };
  23 +#else
  24  enum { MAX_JA_VERSION=4 };
  25 +#endif
  26  static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
  27 +/*
  28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it.
  29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
  30 + */
  31      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
  32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
  33      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
  34      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
  35      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
  36      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
  37 +#endif
  38  };
  39
  40  typedef enum {
  41 @@ -360,15 +374,18 @@
  42      ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
  43  };
  44
  45 -
  46  /* Type def for refactoring changeState_2022 code*/
  47  typedef enum{
  48  #ifdef U_ENABLE_GENERIC_ISO_2022
  49      ISO_2022=0,
  50  #endif
  51 +#if UCONFIG_NO_NON_HTML5_CONVERSION
  52 +    ISO_2022_JP=1
  53 +#else
  54      ISO_2022_JP=1,
  55      ISO_2022_KR=2,
  56      ISO_2022_CN=3
  57 +#endif
  58  } Variant2022;
  59
  60  /*********** ISO 2022 Converter Protos ***********/
  61 @@ -485,12 +502,15 @@
  62                  /* prevent indexing beyond jpCharsetMasks[] */
  63                  myConverterData->version = version = 0;
  64              }
  65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
  66              if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
  67                  myConverterData->myConverterArray[ISO8859_7] =
  68                      ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
  69              }
  70 +#endif
  71              myConverterData->myConverterArray[JISX208] =
  72                  ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
  73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
  74              if(jpCharsetMasks[version]&CSM(JISX212)) {
  75                  myConverterData->myConverterArray[JISX212] =
  76                      ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
  77 @@ -503,6 +523,7 @@
  78                  myConverterData->myConverterArray[KSC5601] =
  79                      ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
  80              }
  81 +#endif
  82
  83              /* set the function pointers to appropriate funtions */
  84              cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
  85 @@ -513,6 +534,7 @@
  86              myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
  87              myConverterData->name[len+1]='\0';
  88          }
  89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
  90          else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
  91              (myLocale[2]=='_' || myLocale[2]=='\0'))
  92          {
  93 @@ -582,6 +604,7 @@
  94                  (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
  95              }
  96          }
  97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
  98          else{
  99  #ifdef U_ENABLE_GENERIC_ISO_2022
 100              myConverterData->isFirstBuffer = TRUE;
 101 @@ -716,6 +739,7 @@
 102      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 103  };
 104
 105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 106  /*************** to unicode *******************/
 107  static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
 108  /*      0                1               2               3               4               5               6               7               8               9    */
 109 @@ -728,6 +752,7 @@
 110      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 111      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 112  };
 113 +#endif
 114
 115
 116  static UCNV_TableStates_2022
 117 @@ -880,6 +905,7 @@
 118                      }
 119                      break;
 120                  /* case SS3_STATE: not used in ISO-2022-JP-x */
 121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 122                  case ISO8859_1:
 123                  case ISO8859_7:
 124                      if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 125 @@ -889,6 +915,7 @@
 126                          myData2022->toU2022State.cs[2]=(int8_t)tempState;
 127                      }
 128                      break;
 129 +#endif
 130                  default:
 131                      if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 132                          *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 133 @@ -900,6 +927,7 @@
 134                  }
 135              }
 136              break;
 137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 138          case ISO_2022_CN:
 139              {
 140                  StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
 141 @@ -961,6 +989,7 @@
 142                  *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 143              }
 144              break;
 145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 146
 147          default:
 148              *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 149 @@ -1381,12 +1410,16 @@
 150  static const StateEnum jpCharsetPref[]={
 151      ASCII,
 152      JISX201,
 153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 154      ISO8859_1,
 155      ISO8859_7,
 156 +#endif
 157      JISX208,
 158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 159      JISX212,
 160      GB2312,
 161      KSC5601,
 162 +#endif
 163      HWKANA_7BIT
 164  };
 165
 166 @@ -1756,6 +1789,7 @@
 167                          g = 0;
 168                      }
 169                      break;
 170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 171                  case ISO8859_1:
 172                      if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
 173                          targetValue = (uint32_t)sourceChar - 0x80;
 174 @@ -1764,6 +1798,7 @@
 175                          g = 2;
 176                      }
 177                      break;
 178 +#endif
 179                  case HWKANA_7BIT:
 180                      if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
 181                          if(converterData->version==3) {
 182 @@ -1825,6 +1860,7 @@
 183                          useFallback = FALSE;
 184                      }
 185                      break;
 186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 187                  case ISO8859_7:
 188                      /* G0 SBCS forced to 7-bit output */
 189                      len2 = MBCS_SINGLE_FROM_UCHAR32(
 190 @@ -1839,6 +1875,7 @@
 191                          useFallback = FALSE;
 192                      }
 193                      break;
 194 +#endif
 195                  default:
 196                      /* G0 DBCS */
 197                      len2 = MBCS_FROM_UCHAR32_ISO2022(
 198 @@ -1846,6 +1883,7 @@
 199                                  sourceChar, &value,
 200                                  useFallback, MBCS_OUTPUT_2);
 201                      if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
 202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 203                          if(cs0 == KSC5601) {
 204                              /*
 205                               * Check for valid bytes for the encoding scheme.
 206 @@ -1857,6 +1895,7 @@
 207                                  break;
 208                              }
 209                          }
 210 +#endif
 211                          targetValue = value;
 212                          len = len2;
 213                          cs = cs0;
 214 @@ -2150,6 +2189,7 @@
 215                          targetUniChar = mySourceChar;
 216                      }
 217                      break;
 218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 219                  case ISO8859_1:
 220                      if(mySourceChar <= 0x7f) {
 221                          targetUniChar = mySourceChar + 0x80;
 222 @@ -2168,6 +2208,7 @@
 223                      /* return from a single-shift state to the previous one */
 224                      pToU2022State->g=pToU2022State->prevG;
 225                      break;
 226 +#endif
 227                  case JISX201:
 228                      if(mySourceChar <= 0x7f) {
 229                          targetUniChar = jisx201ToU(mySourceChar);
 230 @@ -2207,9 +2248,11 @@
 231                              } else {
 232                                  /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
 233                                  mySourceChar = tmpSourceChar;
 234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 235                                  if (cs == KSC5601) {
 236                                      tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
 237                                  }
 238 +#endif
 239                                  tempBuf[0] = (char)(tmpSourceChar >> 8);
 240                                  tempBuf[1] = (char)(tmpSourceChar);
 241                              }
 242 @@ -2271,6 +2314,7 @@
 243  }
 244
 245
 246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 247  /***************************************************************
 248  *   Rules for ISO-2022-KR encoding
 249  *   i) The KSC5601 designator sequence should appear only once in a file,
 250 @@ -3414,6 +3458,7 @@
 251      args->target = myTarget;
 252      args->source = mySource;
 253  }
 254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 255
 256  static void
 257  _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
 258 @@ -3615,6 +3660,7 @@
 259          /* include JIS X 0201 which is hardcoded */
 260          sa->add(sa->set, 0xa5);
 261          sa->add(sa->set, 0x203e);
 262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 263          if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
 264              /* include Latin-1 for some variants of JP */
 265              sa->addRange(sa->set, 0, 0xff);
 266 @@ -3622,6 +3668,10 @@
 267              /* include ASCII for JP */
 268              sa->addRange(sa->set, 0, 0x7f);
 269          }
 270 +#else
 271 +        /* include ASCII for JP */
 272 +        sa->addRange(sa->set, 0, 0x7f);
 273 +#endif
 274          if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
 275              /*
 276               * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
 277 @@ -3640,6 +3690,7 @@
 278              sa->addRange(sa->set, HWKANA_START, HWKANA_END);
 279          }
 280          break;
 281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 282      case 'c':
 283      case 'z':
 284          /* include ASCII for CN */
 285 @@ -3651,6 +3702,7 @@
 286                  cnvData->currentConverter, sa, which, pErrorCode);
 287          /* the loop over myConverterArray[] will simply not find another converter */
 288          break;
 289 +#endif
 290      default:
 291          break;
 292      }
 293 @@ -3671,10 +3723,16 @@
 294      for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
 295          UConverterSetFilter filter;
 296          if(cnvData->myConverterArray[i]!=NULL) {
 297 -            if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
 298 -                cnvData->version==0 && i==CNS_11643
 299 -            ) {
 300 +            if(cnvData->locale[0]=='j' && i==JISX208) {
 301                  /*
 302 +                 * Only add code points that map to Shift-JIS codes
 303 +                 * corresponding to JIS X 0208.
 304 +                 */
 305 +                filter=UCNV_SET_FILTER_SJIS;
 306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 307 +            } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
 308 +                       cnvData->version==0 && i==CNS_11643) {
 309 +                /*
 310                   * Version-specific for CN:
 311                   * CN version 0 does not map CNS planes 3..7 although
 312                   * they are all available in the CNS conversion table;
 313 @@ -3682,18 +3740,13 @@
 314                   * The two versions create different Unicode sets.
 315                   */
 316                  filter=UCNV_SET_FILTER_2022_CN;
 317 -            } else if(cnvData->locale[0]=='j' && i==JISX208) {
 318 -                /*
 319 -                 * Only add code points that map to Shift-JIS codes
 320 -                 * corresponding to JIS X 0208.
 321 -                 */
 322 -                filter=UCNV_SET_FILTER_SJIS;
 323              } else if(i==KSC5601) {
 324                  /*
 325                   * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
 326                   * are broader than GR94.
 327                   */
 328                  filter=UCNV_SET_FILTER_GR94DBCS;
 329 +#endif
 330              } else {
 331                  filter=UCNV_SET_FILTER_NONE;
 332              }
 333 @@ -3831,6 +3884,7 @@
 334
 335  }  // namespace
 336
 337 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 338  /************* KR ***************/
 339  static const UConverterImpl _ISO2022KRImpl={
 340      UCNV_ISO_2022,
 341 @@ -3947,5 +4001,6 @@
 342  };
 343
 344  }  // namespace
 345 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 346
 347  #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
 348 Index: source/common/ucnvbocu.cpp
 349 ===================================================================
 350 --- source/common/ucnvbocu.cpp  (revision 259715)
 351 +++ source/common/ucnvbocu.cpp  (working copy)
 352 @@ -19,7 +19,7 @@
 353
 354  #include "unicode/utypes.h"
 355
 356 -#if !UCONFIG_NO_CONVERSION
 357 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 358
 359  #include "unicode/ucnv.h"
 360  #include "unicode/ucnv_cb.h"
 361 Index: source/common/ucnvisci.c
 362 ===================================================================
 363 --- source/common/ucnvisci.c    (revision 259715)
 364 +++ source/common/ucnvisci.c    (working copy)
 365 @@ -17,7 +17,7 @@
 366
 367  #include "unicode/utypes.h"
 368
 369 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
 370 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 371
 372  #include "unicode/ucnv.h"
 373  #include "unicode/ucnv_cb.h"
 374 Index: source/common/ucnvscsu.c
 375 ===================================================================
 376 --- source/common/ucnvscsu.c    (revision 259715)
 377 +++ source/common/ucnvscsu.c    (working copy)
 378 @@ -21,7 +21,7 @@
 379
 380  #include "unicode/utypes.h"
 381
 382 -#if !UCONFIG_NO_CONVERSION
 383 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 384
 385  #include "unicode/ucnv.h"
 386  #include "unicode/ucnv_cb.h"
 387 Index: source/common/ucnv_u7.c
 388 ===================================================================
 389 --- source/common/ucnv_u7.c     (revision 259715)
 390 +++ source/common/ucnv_u7.c     (working copy)
 391 @@ -16,7 +16,7 @@
 392
 393  #include "unicode/utypes.h"
 394
 395 -#if !UCONFIG_NO_CONVERSION
 396 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 397
 398  #include "unicode/ucnv.h"
 399  #include "ucnv_bld.h"
 400 Index: source/common/unicode/uconfig.h
 401 ===================================================================
 402 --- source/common/unicode/uconfig.h     (revision 259715)
 403 +++ source/common/unicode/uconfig.h     (working copy)
 404 @@ -265,6 +265,14 @@
 405  #endif
 406
 407  /**
 408 + * This switch turns off all the converters NOT listed in
 409 + * the encoding standard : http://encoding.spec.whatwg.org
 410 + */
 411 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
 412 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0
 413 +#endif
 414 +
 415 +/**
 416   * \def UCONFIG_NO_LEGACY_CONVERSION
 417   * This switch turns off all converters except for
 418   * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
 419 Index: source/common/ucnv_bld.cpp
 420 ===================================================================
 421 --- source/common/ucnv_bld.cpp  (revision 259715)
 422 +++ source/common/ucnv_bld.cpp  (working copy)
 423 @@ -69,28 +69,41 @@
 424
 425  #if UCONFIG_NO_LEGACY_CONVERSION
 426      NULL,
 427 +#else
 428 +    &_ISO2022Data,
 429 +#endif
 430 +
 431 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
 432      NULL, NULL, NULL, NULL, NULL, NULL,
 433      NULL, NULL, NULL, NULL, NULL, NULL,
 434      NULL,
 435  #else
 436 -    &_ISO2022Data,
 437      &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
 438      &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
 439      &_HZData,
 440  #endif
 441
 442 +#if UCONFIG_NO_NON_HTML5_CONVERSION
 443 +    NULL,
 444 +#else
 445      &_SCSUData,
 446 +#endif
 447
 448 -#if UCONFIG_NO_LEGACY_CONVERSION
 449 +
 450 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
 451      NULL,
 452  #else
 453      &_ISCIIData,
 454  #endif
 455
 456      &_ASCIIData,
 457 +#if UCONFIG_NO_NON_HTML5_CONVERSION
 458 +    NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL,
 459 +#else
 460      &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
 461 +#endif
 462
 463 -#if UCONFIG_NO_LEGACY_CONVERSION
 464 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
 465      NULL,
 466  #else
 467      &_CompoundTextData
 468 @@ -105,18 +118,24 @@
 469    const char *name;
 470    const UConverterType type;
 471  } const cnvNameType[] = {
 472 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 473    { "bocu1", UCNV_BOCU1 },
 474    { "cesu8", UCNV_CESU8 },
 475 -#if !UCONFIG_NO_LEGACY_CONVERSION
 476 +#endif
 477 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 478    { "hz",UCNV_HZ },
 479  #endif
 480 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 481    { "imapmailboxname", UCNV_IMAP_MAILBOX },
 482 +#endif
 483 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 484 +  { "iscii", UCNV_ISCII },
 485 +#endif
 486  #if !UCONFIG_NO_LEGACY_CONVERSION
 487 -  { "iscii", UCNV_ISCII },
 488    { "iso2022", UCNV_ISO_2022 },
 489  #endif
 490    { "iso88591", UCNV_LATIN_1 },
 491 -#if !UCONFIG_NO_LEGACY_CONVERSION
 492 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 493    { "lmbcs1", UCNV_LMBCS_1 },
 494    { "lmbcs11",UCNV_LMBCS_11 },
 495    { "lmbcs16",UCNV_LMBCS_16 },
 496 @@ -130,7 +149,9 @@
 497    { "lmbcs6", UCNV_LMBCS_6 },
 498    { "lmbcs8", UCNV_LMBCS_8 },
 499  #endif
 500 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 501    { "scsu", UCNV_SCSU },
 502 +#endif
 503    { "usascii", UCNV_US_ASCII },
 504    { "utf16", UCNV_UTF16 },
 505    { "utf16be", UCNV_UTF16_BigEndian },
 506 @@ -152,9 +173,13 @@
 507    { "utf32oppositeendian", UCNV_UTF32_BigEndian },
 508    { "utf32platformendian", UCNV_UTF32_LittleEndian },
 509  #endif
 510 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 511    { "utf7", UCNV_UTF7 },
 512 +#endif
 513    { "utf8", UCNV_UTF8 },
 514 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 515    { "x11compoundtext", UCNV_COMPOUND_TEXT}
 516 +#endif
 517  };
 518
 519
 520 Index: source/common/ucnv_u8.c
 521 ===================================================================
 522 --- source/common/ucnv_u8.c     (revision 259715)
 523 +++ source/common/ucnv_u8.c     (working copy)
 524 @@ -87,6 +87,15 @@
 525  static const uint32_t
 526  utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
 527
 528 +static UBool hasCESU8Data(const UConverter *cnv)
 529 +{
 530 +#if UCONFIG_NO_NON_HTML5_CONVERSION
 531 +    return FALSE;
 532 +#else
 533 +    return (UBool)(cnv->sharedData == &_CESU8Data);
 534 +#endif
 535 +}
 536 +
 537  static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
 538                                    UErrorCode * err)
 539  {
 540 @@ -96,10 +105,10 @@
 541      const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
 542      const UChar *targetLimit = args->targetLimit;
 543      unsigned char *toUBytes = cnv->toUBytes;
 544 -    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
 545 +    UBool isCESU8 = hasCESU8Data(cnv);
 546      uint32_t ch, ch2 = 0;
 547      int32_t i, inBytes;
 548 -
 549 +
 550      /* Restore size of current sequence */
 551      if (cnv->toUnicodeStatus && myTarget < targetLimit)
 552      {
 553 @@ -226,7 +235,7 @@
 554      const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
 555      const UChar *targetLimit = args->targetLimit;
 556      unsigned char *toUBytes = cnv->toUBytes;
 557 -    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
 558 +    UBool isCESU8 = hasCESU8Data(cnv);
 559      uint32_t ch, ch2 = 0;
 560      int32_t i, inBytes;
 561
 562 @@ -357,7 +366,7 @@
 563      UChar32 ch;
 564      uint8_t tempBuf[4];
 565      int32_t indexToWrite;
 566 -    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
 567 +    UBool isNotCESU8 = !hasCESU8Data(cnv);
 568
 569      if (cnv->fromUChar32 && myTarget < targetLimit)
 570      {
 571 @@ -473,7 +482,7 @@
 572      int32_t offsetNum, nextSourceIndex;
 573      int32_t indexToWrite;
 574      uint8_t tempBuf[4];
 575 -    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
 576 +    UBool isNotCESU8 = !hasCESU8Data(cnv);
 577
 578      if (cnv->fromUChar32 && myTarget < targetLimit)
 579      {
 580 Index: source/common/unicode/urename.h
 581 ===================================================================
 582 --- source/common/unicode/urename.h     (revision 259715)
 583 +++ source/common/unicode/urename.h     (working copy)
 584 @@ -73,12 +73,14 @@
 585  #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
 586  #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
 587  #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
 588 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 589  #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
 590  #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
 591  #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
 592  #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
 593  #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
 594  #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
 595 +#endif
 596  #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
 597  #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
 598  #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
 599 @@ -94,14 +96,18 @@
 600  #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
 601  #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
 602  #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
 603 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 604  #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
 605 +#endif
 606  #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
 607  #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
 608  #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
 609  #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
 610  #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
 611  #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
 612 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 613  #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
 614 +#endif
 615  #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
 616  #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
 617  #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
 618 Index: source/common/ucnv_cnv.h
 619 ===================================================================
 620 --- source/common/ucnv_cnv.h    (revision 259715)
 621 +++ source/common/ucnv_cnv.h    (working copy)
 622 @@ -256,11 +256,15 @@
 623  extern const UConverterSharedData
 624      _MBCSData, _Latin1Data,
 625      _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
 626 -    _ISO2022Data,
 627 +    _ISO2022Data,
 628 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 629      _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
 630      _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
 631      _HZData,_ISCIIData, _SCSUData, _ASCIIData,
 632      _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
 633 +#else
 634 +    _ASCIIData, _UTF16Data, _UTF32Data;
 635 +#endif
 636
 637  U_CDECL_END
 638
 639 Index: source/common/ucnv_lmb.c
 640 ===================================================================
 641 --- source/common/ucnv_lmb.c    (revision 291619)
 642 +++ source/common/ucnv_lmb.c    (working copy)
 643 @@ -25,7 +25,7 @@
 644
 645  #include "unicode/utypes.h"
 646
 647 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
 648 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 649
 650  #include "unicode/ucnv_err.h"
 651  #include "unicode/ucnv.h"
 652 Index: source/common/ucnvhz.c
 653 ===================================================================
 654 --- source/common/ucnvhz.c      (revision 291619)
 655 +++ source/common/ucnvhz.c      (working copy)
 656 @@ -16,7 +16,7 @@
 657
 658  #include "unicode/utypes.h"
 659
 660 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
 661 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 662
 663  #include "cmemory.h"
 664  #include "unicode/ucnv.h"
 665 @@ -637,4 +637,4 @@
 666          0
 667  };
 668
 669 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
 670 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION */
 671 Index: source/common/ucnv_ct.c
 672 ===================================================================
 673 --- source/common/ucnv_ct.c     (revision 291619)
 674 +++ source/common/ucnv_ct.c     (working copy)
 675 @@ -14,7 +14,7 @@
 676
 677  #include "unicode/utypes.h"
 678
 679 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
 680 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
 681
 682  #include "unicode/ucnv.h"
 683  #include "unicode/uset.h"
 684 Index: source/i18n/csrsbcs.h
 685 ===================================================================
 686 --- source/i18n/csrsbcs.h       (revision 291619)
 687 +++ source/i18n/csrsbcs.h       (working copy)
 688 @@ -50,6 +50,7 @@
 689
 690  };
 691
 692 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 693  class NGramParser_IBM420 : public NGramParser
 694  {
 695  private:
 696 @@ -61,6 +62,7 @@
 697  public:
 698      NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
 699  };
 700 +#endif
 701
 702
 703  class CharsetRecog_sbcs : public CharsetRecognizer
 704 @@ -229,6 +231,7 @@
 705      virtual UBool match(InputText *det, CharsetMatch *results) const;
 706  };
 707
 708 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 709  class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
 710  {
 711  public:
 712 @@ -280,6 +283,7 @@
 713
 714      virtual UBool match(InputText *det, CharsetMatch *results) const;
 715  };
 716 +#endif
 717
 718  U_NAMESPACE_END
 719
 720 Index: source/i18n/csr2022.h
 721 ===================================================================
 722 --- source/i18n/csr2022.h       (revision 291619)
 723 +++ source/i18n/csr2022.h       (working copy)
 724 @@ -65,6 +65,7 @@
 725      UBool match(InputText *textIn, CharsetMatch *results) const;
 726  };
 727
 728 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 729  class CharsetRecog_2022KR :public CharsetRecog_2022 {
 730  public:
 731      virtual ~CharsetRecog_2022KR();
 732 @@ -84,6 +85,7 @@
 733
 734      UBool match(InputText *textIn, CharsetMatch *results) const;
 735  };
 736 +#endif
 737
 738  U_NAMESPACE_END
 739
 740 Index: source/i18n/csr2022.cpp
 741 ===================================================================
 742 --- source/i18n/csr2022.cpp     (revision 291619)
 743 +++ source/i18n/csr2022.cpp     (working copy)
 744 @@ -119,6 +119,7 @@
 745      {0x1b, 0x2e, 0x46, 0x00, 0x00}    // ISO 8859-7
 746  };
 747
 748 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 749  static const uint8_t escapeSequences_2022KR[][5] = {
 750      {0x1b, 0x24, 0x29, 0x43, 0x00}
 751  };
 752 @@ -136,6 +137,7 @@
 753      {0x1b, 0x4e, 0x00, 0x00, 0x00},   // SS2
 754      {0x1b, 0x4f, 0x00, 0x00, 0x00},   // SS3
 755  };
 756 +#endif
 757
 758  CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
 759
 760 @@ -152,6 +154,7 @@
 761      return (confidence > 0);
 762  }
 763
 764 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 765  CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
 766
 767  const char *CharsetRecog_2022KR::getName() const {
 768 @@ -181,6 +184,7 @@
 769      results->set(textIn, this, confidence);
 770      return (confidence > 0);
 771  }
 772 +#endif
 773
 774  CharsetRecog_2022::~CharsetRecog_2022() {
 775      // nothing to do
 776 Index: source/i18n/csdetect.cpp
 777 ===================================================================
 778 --- source/i18n/csdetect.cpp    (revision 291619)
 779 +++ source/i18n/csdetect.cpp    (working copy)
 780 @@ -110,6 +110,7 @@
 781          new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
 782
 783          new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
 784 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 785          new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
 786          new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
 787
 788 @@ -117,6 +118,7 @@
 789          new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
 790          new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
 791          new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
 792 +#endif
 793      };
 794      int32_t rCount = ARRAY_SIZE(tempArray);
 795
 796 Index: source/i18n/csrsbcs.cpp
 797 ===================================================================
 798 --- source/i18n/csrsbcs.cpp     (revision 291619)
 799 +++ source/i18n/csrsbcs.cpp     (working copy)
 800 @@ -137,6 +137,7 @@
 801      return (int32_t) (rawPercent * 300.0);
 802  }
 803
 804 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 805  static const uint8_t unshapeMap_IBM420[] = {
 806  /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
 807  /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
 808 @@ -232,6 +233,7 @@
 809          }
 810      }
 811  }
 812 +#endif
 813
 814  CharsetRecog_sbcs::CharsetRecog_sbcs()
 815  {
 816 @@ -624,6 +626,7 @@
 817      0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
 818  };
 819
 820 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 821  static const int32_t ngrams_IBM424_he_rtl[] = {
 822      0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
 823      0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
 824 @@ -691,6 +694,7 @@
 825  /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
 826  /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
 827  };
 828 +#endif
 829
 830  //ISO-8859-1,2,5,6,7,8,9 Ngrams
 831
 832 @@ -1155,6 +1159,7 @@
 833      return (confidence > 0);
 834  }
 835
 836 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
 837  CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
 838  {
 839      // nothing to do
 840 @@ -1253,6 +1258,7 @@
 841      results->set(textIn, this, confidence);
 842      return (confidence > 0);
 843  }
 844 +#endif
 845
 846  U_NAMESPACE_END
 847  #endif