Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / third_party / icu / source / common / ucnv2022.cpp
index b0b2738..8158363 100644 (file)
@@ -154,7 +154,11 @@ typedef enum  {
 } StateEnum;
 
 /* is the StateEnum charset value for a DBCS charset? */
+#if UCONFIG_NO_NON_HTML5_CONVERSION
+#define IS_JP_DBCS(cs) (JISX208==(cs))
+#else
 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
+#endif
 
 #define CSM(cs) ((uint16_t)1<<(cs))
 
@@ -173,6 +177,10 @@ enum { MAX_JA_VERSION=0 };
 enum { MAX_JA_VERSION=4 };
 #endif
 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
+/* 
+ * TODO(jshin): The encoding spec has JISX212, but we don't support it.
+ * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
+ */
     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
 #if !UCONFIG_NO_NON_HTML5_CONVERSION
     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
@@ -366,23 +374,15 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 };
 
-
-/* Enable ISO-2022-{KR,CN,CN-Ext} for now.
- * TODO(jshin): Disable it when we know what to do about 'replacement'
- * encodings. See http://crbug.com/277037 and
- * https://codereview.chromium.org/145973021/
- */
-#ifndef U_ENABLE_ISO_2022_KR_CN
-#define U_ENABLE_ISO_2022_KR_CN 1
-#endif
-
 /* Type def for refactoring changeState_2022 code*/
 typedef enum{
 #ifdef U_ENABLE_GENERIC_ISO_2022
     ISO_2022=0,
 #endif
+#if UCONFIG_NO_NON_HTML5_CONVERSION
+    ISO_2022_JP=1
+#else
     ISO_2022_JP=1,
-#ifdef U_ENABLE_ISO_2022_KR_CN
     ISO_2022_KR=2,
     ISO_2022_CN=3
 #endif
@@ -517,7 +517,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
             }
             if(jpCharsetMasks[version]&CSM(GB2312)) {
                 myConverterData->myConverterArray[GB2312] =
-                    ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
+                    ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
             }
             if(jpCharsetMasks[version]&CSM(KSC5601)) {
                 myConverterData->myConverterArray[KSC5601] =
@@ -534,7 +534,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
             myConverterData->name[len+1]='\0';
         }
-#ifdef U_ENABLE_ISO_2022_KR_CN
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
             (myLocale[2]=='_' || myLocale[2]=='\0'))
         {
@@ -580,13 +580,13 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
 
             /* open the required converters and cache them */
             myConverterData->myConverterArray[GB2312_1] =
-                ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
+                ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
             if(version==1) {
                 myConverterData->myConverterArray[ISO_IR_165] =
-                    ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
+                    ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
             }
             myConverterData->myConverterArray[CNS_11643] =
-                ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
+                ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
 
 
             /* set the function pointers to appropriate funtions */
@@ -604,7 +604,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
             }
         }
-#endif // U_ENABLE_ISO_2022_KR_CN
+#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
         else{
 #ifdef U_ENABLE_GENERIC_ISO_2022
             myConverterData->isFirstBuffer = TRUE;
@@ -739,6 +739,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 };
 
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
 /*************** to unicode *******************/
 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
 /*      0                1               2               3               4               5               6               7               8               9    */
@@ -751,6 +752,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 };
+#endif
 
 
 static UCNV_TableStates_2022
@@ -903,6 +905,7 @@ DONE:
                     }
                     break;
                 /* case SS3_STATE: not used in ISO-2022-JP-x */
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
                 case ISO8859_1:
                 case ISO8859_7:
                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
@@ -912,6 +915,7 @@ DONE:
                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
                     }
                     break;
+#endif
                 default:
                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
@@ -923,6 +927,7 @@ DONE:
                 }
             }
             break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
         case ISO_2022_CN:
             {
                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
@@ -984,6 +989,7 @@ DONE:
                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
             }
             break;
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 
         default:
             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
@@ -1404,12 +1410,16 @@ toUnicodeCallback(UConverter *cnv,
 static const StateEnum jpCharsetPref[]={
     ASCII,
     JISX201,
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
     ISO8859_1,
     ISO8859_7,
+#endif
     JISX208,
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
     JISX212,
     GB2312,
     KSC5601,
+#endif
     HWKANA_7BIT
 };
 
@@ -1779,6 +1789,7 @@ getTrail:
                         g = 0;
                     }
                     break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
                 case ISO8859_1:
                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
                         targetValue = (uint32_t)sourceChar - 0x80;
@@ -1787,6 +1798,7 @@ getTrail:
                         g = 2;
                     }
                     break;
+#endif
                 case HWKANA_7BIT:
                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
                         if(converterData->version==3) {
@@ -1848,6 +1860,7 @@ getTrail:
                         useFallback = FALSE;
                     }
                     break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
                 case ISO8859_7:
                     /* G0 SBCS forced to 7-bit output */
                     len2 = MBCS_SINGLE_FROM_UCHAR32(
@@ -1862,6 +1875,7 @@ getTrail:
                         useFallback = FALSE;
                     }
                     break;
+#endif
                 default:
                     /* G0 DBCS */
                     len2 = MBCS_FROM_UCHAR32_ISO2022(
@@ -1869,6 +1883,7 @@ getTrail:
                                 sourceChar, &value,
                                 useFallback, MBCS_OUTPUT_2);
                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
                         if(cs0 == KSC5601) {
                             /*
                              * Check for valid bytes for the encoding scheme.
@@ -1880,6 +1895,7 @@ getTrail:
                                 break;
                             }
                         }
+#endif
                         targetValue = value;
                         len = len2;
                         cs = cs0;
@@ -2173,6 +2189,7 @@ escape:
                         targetUniChar = mySourceChar;
                     }
                     break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
                 case ISO8859_1:
                     if(mySourceChar <= 0x7f) {
                         targetUniChar = mySourceChar + 0x80;
@@ -2191,6 +2208,7 @@ escape:
                     /* return from a single-shift state to the previous one */
                     pToU2022State->g=pToU2022State->prevG;
                     break;
+#endif
                 case JISX201:
                     if(mySourceChar <= 0x7f) {
                         targetUniChar = jisx201ToU(mySourceChar);
@@ -2230,9 +2248,11 @@ getTrailByte:
                             } else {
                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
                                 mySourceChar = tmpSourceChar;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
                                 if (cs == KSC5601) {
                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
                                 }
+#endif
                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
                                 tempBuf[1] = (char)(tmpSourceChar);
                             }
@@ -2294,6 +2314,7 @@ endloop:
 }
 
 
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
 /***************************************************************
 *   Rules for ISO-2022-KR encoding
 *   i) The KSC5601 designator sequence should appear only once in a file,
@@ -3437,6 +3458,7 @@ endloop:
     args->target = myTarget;
     args->source = mySource;
 }
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 
 static void
 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
@@ -3638,6 +3660,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
         /* include JIS X 0201 which is hardcoded */
         sa->add(sa->set, 0xa5);
         sa->add(sa->set, 0x203e);
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
             /* include Latin-1 for some variants of JP */
             sa->addRange(sa->set, 0, 0xff);
@@ -3645,6 +3668,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
             /* include ASCII for JP */
             sa->addRange(sa->set, 0, 0x7f);
         }
+#else
+        /* include ASCII for JP */
+        sa->addRange(sa->set, 0, 0x7f);
+#endif
         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
             /*
              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
@@ -3663,6 +3690,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
         }
         break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
     case 'c':
     case 'z':
         /* include ASCII for CN */
@@ -3674,6 +3702,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
                 cnvData->currentConverter, sa, which, pErrorCode);
         /* the loop over myConverterArray[] will simply not find another converter */
         break;
+#endif
     default:
         break;
     }
@@ -3694,9 +3723,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
         UConverterSetFilter filter;
         if(cnvData->myConverterArray[i]!=NULL) {
-            if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
-                cnvData->version==0 && i==CNS_11643
-            ) {
+            if(cnvData->locale[0]=='j' && i==JISX208) {
+                /*
+                 * Only add code points that map to Shift-JIS codes
+                 * corresponding to JIS X 0208.
+                 */
+                filter=UCNV_SET_FILTER_SJIS;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
+            } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+                       cnvData->version==0 && i==CNS_11643) {
                 /*
                  * Version-specific for CN:
                  * CN version 0 does not map CNS planes 3..7 although
@@ -3705,18 +3740,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
                  * The two versions create different Unicode sets.
                  */
                 filter=UCNV_SET_FILTER_2022_CN;
-            } else if(cnvData->locale[0]=='j' && i==JISX208) {
-                /*
-                 * Only add code points that map to Shift-JIS codes
-                 * corresponding to JIS X 0208.
-                 */
-                filter=UCNV_SET_FILTER_SJIS;
             } else if(i==KSC5601) {
                 /*
                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
                  * are broader than GR94.
                  */
                 filter=UCNV_SET_FILTER_GR94DBCS;
+#endif
             } else {
                 filter=UCNV_SET_FILTER_NONE;
             }
@@ -3854,6 +3884,7 @@ const UConverterSharedData _ISO2022JPData={
 
 }  // namespace
 
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
 /************* KR ***************/
 static const UConverterImpl _ISO2022KRImpl={
     UCNV_ISO_2022,
@@ -3970,5 +4001,6 @@ const UConverterSharedData _ISO2022CNData={
 };
 
 }  // namespace
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 
 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */