Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / icu / source / common / ucnv_u8.c
index 75f554c..24205f5 100644 (file)
@@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2007, International Business Machines
+*   Copyright (C) 2002-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_u8.c
@@ -23,6 +23,9 @@
 #if !UCONFIG_NO_CONVERSION
 
 #include "unicode/ucnv.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"
 #include "cmemory.h"
@@ -84,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
 static const uint32_t
 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
 
+static UBool hasCESU8Data(const UConverter *cnv)
+{
+#if UCONFIG_NO_NON_HTML5_CONVERSION
+    return FALSE;
+#else
+    return (UBool)(cnv->sharedData == &_CESU8Data);
+#endif
+}
+
 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
                                   UErrorCode * err)
 {
@@ -93,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     const UChar *targetLimit = args->targetLimit;
     unsigned char *toUBytes = cnv->toUBytes;
-    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
+    UBool isCESU8 = hasCESU8Data(cnv);
     uint32_t ch, ch2 = 0;
     int32_t i, inBytes;
-  
+
     /* Restore size of current sequence */
     if (cnv->toUnicodeStatus && myTarget < targetLimit)
     {
@@ -130,7 +142,7 @@ morebytes:
                 if (mySource < sourceLimit)
                 {
                     toUBytes[i] = (char) (ch2 = *mySource);
-                    if (!UTF8_IS_TRAIL(ch2))
+                    if (!U8_IS_TRAIL(ch2))
                     {
                         break; /* i < inBytes */
                     }
@@ -164,7 +176,7 @@ morebytes:
              * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
              */
             if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
-                (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
+                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
             {
                 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                 if (ch <= MAXIMUM_UCS2) 
@@ -223,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     const UChar *targetLimit = args->targetLimit;
     unsigned char *toUBytes = cnv->toUBytes;
-    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
+    UBool isCESU8 = hasCESU8Data(cnv);
     uint32_t ch, ch2 = 0;
     int32_t i, inBytes;
 
@@ -259,7 +271,7 @@ morebytes:
                 if (mySource < sourceLimit)
                 {
                     toUBytes[i] = (char) (ch2 = *mySource);
-                    if (!UTF8_IS_TRAIL(ch2))
+                    if (!U8_IS_TRAIL(ch2))
                     {
                         break; /* i < inBytes */
                     }
@@ -292,7 +304,7 @@ morebytes:
              * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
              */
             if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
-                (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
+                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
             {
                 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                 if (ch <= MAXIMUM_UCS2) 
@@ -354,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
     UChar32 ch;
     uint8_t tempBuf[4];
     int32_t indexToWrite;
-    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
+    UBool isNotCESU8 = !hasCESU8Data(cnv);
 
     if (cnv->fromUChar32 && myTarget < targetLimit)
     {
@@ -387,13 +399,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
         }
         else {
             /* Check for surrogates */
-            if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
+            if(U16_IS_SURROGATE(ch) && isNotCESU8) {
 lowsurrogate:
                 if (mySource < sourceLimit) {
                     /* test both code units */
-                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
+                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
                         /* convert and consume this supplementary code point */
-                        ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
+                        ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
                         ++mySource;
                         /* exit this condition tree */
                     }
@@ -470,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
     int32_t offsetNum, nextSourceIndex;
     int32_t indexToWrite;
     uint8_t tempBuf[4];
-    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
+    UBool isNotCESU8 = !hasCESU8Data(cnv);
 
     if (cnv->fromUChar32 && myTarget < targetLimit)
     {
@@ -513,13 +525,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
         {
             nextSourceIndex = offsetNum + 1;
 
-            if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
+            if(U16_IS_SURROGATE(ch) && isNotCESU8) {
 lowsurrogate:
                 if (mySource < sourceLimit) {
                     /* test both code units */
-                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
+                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
                         /* convert and consume this supplementary code point */
-                        ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
+                        ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
                         ++mySource;
                         ++nextSourceIndex;
                         /* exit this condition tree */
@@ -662,42 +674,42 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
     case 6:
         ch += (myByte = *source);
         ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
         {
             isLegalSequence = 0;
             break;
         }
         ++source;
-    case 5:
+    case 5: /*fall through*/
         ch += (myByte = *source);
         ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
         {
             isLegalSequence = 0;
             break;
         }
         ++source;
-    case 4:
+    case 4: /*fall through*/
         ch += (myByte = *source);
         ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
         {
             isLegalSequence = 0;
             break;
         }
         ++source;
-    case 3:
+    case 3: /*fall through*/
         ch += (myByte = *source);
         ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
         {
             isLegalSequence = 0;
             break;
         }
         ++source;
-    case 2:
+    case 2: /*fall through*/
         ch += (myByte = *source);
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
         {
             isLegalSequence = 0;
             break;
@@ -749,7 +761,7 @@ static void
 ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                   UConverterToUnicodeArgs *pToUArgs,
                   UErrorCode *pErrorCode) {
-    UConverter *utf8, *cnv;
+    UConverter *utf8;
     const uint8_t *source, *sourceLimit;
     uint8_t *target;
     int32_t targetCapacity;
@@ -762,7 +774,6 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 
     /* set up the local pointers */
     utf8=pToUArgs->converter;
-    cnv=pFromUArgs->converter;
     source=(uint8_t *)pToUArgs->source;
     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
     target=(uint8_t *)pFromUArgs->target;
@@ -819,7 +830,7 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
             if(U8_IS_TRAIL(b)) {
                 ++i;
             } else {
-                if(i<utf8_countTrailBytes[b]) {
+                if(i<U8_COUNT_TRAIL_BYTES(b)) {
                     /* stop converting before the lead byte if there are not enough trail bytes for it */
                     count-=i+1;
                 }
@@ -885,7 +896,7 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
             /* handle "complicated" and error cases, and continuing partial characters */
             oldToULength=0;
             toULength=1;
-            toULimit=utf8_countTrailBytes[b]+1;
+            toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
             c=b;
 moreBytes:
             while(toULength<toULimit) {
@@ -958,7 +969,7 @@ moreBytes:
             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         } else {
             b=*source;
-            toULimit=utf8_countTrailBytes[b]+1;
+            toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
             if(toULimit>(sourceLimit-source)) {
                 /* collect a truncated byte sequence */
                 toULength=0;