Copy updated files from lib9/utf from upstream repository.

author Fredrik Roubert <roubert@google.com>

Thu, 28 Aug 2014 14:50:22 +0000 (14:50 +0000)

committer Youngjae Shin <yj99.shin@samsung.com>

Tue, 9 Jun 2015 11:43:19 +0000 (20:43 +0900)
author Fredrik Roubert <roubert@google.com>
Thu, 28 Aug 2014 14:50:22 +0000 (14:50 +0000)
committer Youngjae Shin <yj99.shin@samsung.com>
Tue, 9 Jun 2015 11:43:19 +0000 (20:43 +0900)
diff --git a/cpp/src/phonenumbers/utf/rune.c b/cpp/src/phonenumbers/utf/rune.c

index c268489..b4aa93b 100644 (file)
--- a/cpp/src/phonenumbers/utf/rune.c
+++ b/cpp/src/phonenumbers/utf/rune.c
@@ -1,6 +1,7 @@
  /*
   * The authors of this software are Rob Pike and Ken Thompson.
   *              Copyright (c) 2002 by Lucent Technologies.
+ *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
   * Permission to use, copy, modify, and distribute this software for any
   * purpose without fee is hereby granted, provided that this entire notice
   * is included in all copies of any software which is or includes a copy
@@ -11,8 +12,6 @@
   * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
   * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
   */
-#include <stdarg.h>
-#include <string.h>
  #include "phonenumbers/utf/utf.h"
  #include "phonenumbers/utf/utfdef.h"
  
@@ -35,12 +34,14 @@ enum
         Rune1   = (1<<(Bit1+0*Bitx))-1,         /* 0000 0000 0111 1111 */
         Rune2   = (1<<(Bit2+1*Bitx))-1,         /* 0000 0111 1111 1111 */
         Rune3   = (1<<(Bit3+2*Bitx))-1,         /* 1111 1111 1111 1111 */
-       Rune4   = (1<<(Bit4+3*Bitx))-1,
-                                        /* 0001 1111 1111 1111 1111 1111 */
+       Rune4   = (1<<(Bit4+3*Bitx))-1,         /* 0001 1111 1111 1111 1111 1111 */
  
         Maskx   = (1<<Bitx)-1,                  /* 0011 1111 */
         Testx   = Maskx ^ 0xFF,                 /* 1100 0000 */
  
+       SurrogateMin    = 0xD800,
+       SurrogateMax    = 0xDFFF,
+
         Bad     = Runeerror,
  };
  
@@ -79,7 +80,7 @@ charntorune(Rune *rune, const char *str, int length)
          */
         c = *(uchar*)str;
         if(c < Tx) {
-               *rune = c;
+               *rune = (Rune)c;
                 return 1;
         }
  
@@ -101,7 +102,7 @@ charntorune(Rune *rune, const char *str, int length)
                 l = ((c << Bitx) | c1) & Rune2;
                 if(l <= Rune1)
                         goto bad;
-               *rune = l;
+               *rune = (Rune)l;
                 return 2;
         }
  
@@ -121,7 +122,9 @@ charntorune(Rune *rune, const char *str, int length)
                 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
                 if(l <= Rune2)
                         goto bad;
-               *rune = l;
+               if (SurrogateMin <= l && l <= SurrogateMax)
+                       goto bad;
+               *rune = (Rune)l;
                 return 3;
         }
  
@@ -137,9 +140,9 @@ charntorune(Rune *rune, const char *str, int length)
                 goto bad;
         if (c < T5) {
                 l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-               if (l <= Rune3)
+               if (l <= Rune3 || l > Runemax)
                         goto bad;
-               *rune = l;
+               *rune = (Rune)l;
                 return 4;
         }
  
@@ -175,7 +178,7 @@ chartorune(Rune *rune, const char *str)
          */
         c = *(uchar*)str;
         if(c < Tx) {
-               *rune = c;
+               *rune = (Rune)c;
                 return 1;
         }
  
@@ -192,7 +195,7 @@ chartorune(Rune *rune, const char *str)
                 l = ((c << Bitx) | c1) & Rune2;
                 if(l <= Rune1)
                         goto bad;
-               *rune = l;
+               *rune = (Rune)l;
                 return 2;
         }
  
@@ -207,7 +210,9 @@ chartorune(Rune *rune, const char *str)
                 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
                 if(l <= Rune2)
                         goto bad;
-               *rune = l;
+               if (SurrogateMin <= l && l <= SurrogateMax)
+                       goto bad;
+               *rune = (Rune)l;
                 return 3;
         }
  
@@ -220,9 +225,9 @@ chartorune(Rune *rune, const char *str)
                 goto bad;
         if (c < T5) {
                 l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-               if (l <= Rune3)
+               if (l <= Rune3 || l > Runemax)
                         goto bad;
-               *rune = l;
+               *rune = (Rune)l;
                 return 4;
         }
  
@@ -240,7 +245,8 @@ bad:
  }
  
  int
-isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed)
+{
         *consumed = charntorune(rune, str, length);
         return *rune != Runeerror || *consumed == 3;
  }
@@ -257,7 +263,7 @@ runetochar(char *str, const Rune *rune)
          */
         c = *rune;
         if(c <= Rune1) {
-               str[0] = c;
+               str[0] = (char)c;
                 return 1;
         }
  
@@ -266,28 +272,30 @@ runetochar(char *str, const Rune *rune)
          *      0080-07FF => T2 Tx
          */
         if(c <= Rune2) {
-               str[0] = T2 | (c >> 1*Bitx);
-               str[1] = Tx | (c & Maskx);
+               str[0] = (char)(T2 | (c >> 1*Bitx));
+               str[1] = (char)(Tx | (c & Maskx));
                 return 2;
         }
  
         /*
-        * If the Rune is out of range, convert it to the error rune.
+        * If the Rune is out of range or a surrogate half, convert it to the error rune.
          * Do this test here because the error rune encodes to three bytes.
          * Doing it earlier would duplicate work, since an out of range
          * Rune wouldn't have fit in one or two bytes.
          */
         if (c > Runemax)
                 c = Runeerror;
+       if (SurrogateMin <= c && c <= SurrogateMax)
+               c = Runeerror;
  
         /*
          * three character sequence
          *      0800-FFFF => T3 Tx Tx
          */
         if (c <= Rune3) {
-               str[0] = T3 |  (c >> 2*Bitx);
-               str[1] = Tx | ((c >> 1*Bitx) & Maskx);
-               str[2] = Tx |  (c & Maskx);
+               str[0] = (char)(T3 |  (c >> 2*Bitx));
+               str[1] = (char)(Tx | ((c >> 1*Bitx) & Maskx));
+               str[2] = (char)(Tx |  (c & Maskx));
                 return 3;
         }
  
@@ -295,10 +303,10 @@ runetochar(char *str, const Rune *rune)
          * four character sequence (21-bit value)
          *     10000-1FFFFF => T4 Tx Tx Tx
          */
-       str[0] = T4 | (c >> 3*Bitx);
-       str[1] = Tx | ((c >> 2*Bitx) & Maskx);
-       str[2] = Tx | ((c >> 1*Bitx) & Maskx);
-       str[3] = Tx | (c & Maskx);
+       str[0] = (char)(T4 | (c >> 3*Bitx));
+       str[1] = (char)(Tx | ((c >> 2*Bitx) & Maskx));
+       str[2] = (char)(Tx | ((c >> 1*Bitx) & Maskx));
+       str[3] = (char)(Tx | (c & Maskx));
         return 4;
  }
  
@@ -317,7 +325,7 @@ runenlen(const Rune *r, int nrune)
  
         nb = 0;
         while(nrune--) {
-               c = *r++;
+               c = (int)*r++;
                 if (c <= Rune1)
                         nb++;
                 else if (c <= Rune2)
diff --git a/cpp/src/phonenumbers/utf/unicodetext.cc b/cpp/src/phonenumbers/utf/unicodetext.cc

index 55ffedf..ecd3230 100644 (file)
--- a/cpp/src/phonenumbers/utf/unicodetext.cc
+++ b/cpp/src/phonenumbers/utf/unicodetext.cc
@@ -85,7 +85,7 @@ static int ConvertToInterchangeValid(char* start, int len) {
        }
      }
      // Is the current string invalid UTF8 or just non-interchange UTF8?
-    char32 rune;
+    Rune rune;
      int n;
      if (isvalidcharntorune(start, end - start, &rune, &n)) {
        // structurally valid UTF8, but not interchange valid
@@ -362,7 +362,8 @@ UnicodeText::~UnicodeText() {}
  void UnicodeText::push_back(char32 c) {
    if (UniLib::IsValidCodepoint(c)) {
      char buf[UTFmax];
-    int len = runetochar(buf, &c);
+    Rune rune = c;
+    int len = runetochar(buf, &rune);
      if (UniLib::IsInterchangeValid(buf, len)) {
        repr_.append(buf, len);
      } else {
diff --git a/cpp/src/phonenumbers/utf/unilib.cc b/cpp/src/phonenumbers/utf/unilib.cc

index ffcb8b0..918134e 100644 (file)
--- a/cpp/src/phonenumbers/utf/unilib.cc
+++ b/cpp/src/phonenumbers/utf/unilib.cc
@@ -46,7 +46,7 @@ inline bool IsInterchangeValidCodepoint(char32 c) {
  }  // namespace
  
  int SpanInterchangeValid(const char* begin, int byte_length) {
-  char32 rune;
+  Rune rune;
    const char* p = begin;
    const char* end = begin + byte_length;
    while (p < end) {
diff --git a/cpp/src/phonenumbers/utf/utf.h b/cpp/src/phonenumbers/utf/utf.h

index f4fd482..72d01ed 100644 (file)
--- a/cpp/src/phonenumbers/utf/utf.h
+++ b/cpp/src/phonenumbers/utf/utf.h
@@ -1,27 +1,22 @@
  /*
   * The authors of this software are Rob Pike and Ken Thompson.
- * Copyright (c) 1998-2002 by Lucent Technologies.
- * Portions Copyright (c) 2009 The Go Authors. All rights reserved.
+ *              Copyright (c) 1998-2002 by Lucent Technologies.
+ *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
   * Permission to use, copy, modify, and distribute this software for any
   * purpose without fee is hereby granted, provided that this entire notice
   * is included in all copies of any software which is or includes a copy
   * or modification of this software and in all copies of the supporting
   * documentation for such software.
   * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
   * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
   * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */ 
+ */
  
  #ifndef _UTFH_
  #define _UTFH_ 1
  
-// stdint.h content doesn't seem to be used in this file and doesn't exist on
-// Windows, therefore we comment it out here so that the code could be compiled
-// on Windows.
-//#include <stdint.h>
-
-typedef signed int Rune;       /* Code-point values in Unicode 4.0 are 21 bits wide.*/
+typedef unsigned int Rune;     /* Code-point values in Unicode 4.0 are 21 bits wide.*/
  
  enum
  {
@@ -71,7 +66,7 @@ int chartorune(Rune* r, const char* s);
  // n bytes of s.  If the UTF sequence is incomplete within n bytes,
  // charntorune will set *r to Runeerror and return 0. If it is complete
  // but not in UTF format, it will set *r to Runeerror and return 1.
-// 
+//
  // Added 2004-09-24 by Wei-Hwa Huang
  
  int charntorune(Rune* r, const char* s, int n);
@@ -126,7 +121,7 @@ int utfnlen(const char* s, long n);
  // byte terminating a string is considered to be part of the string s.
  // (cf. strchr)
  
-const char* utfrune(const char* s, Rune r);
+/*const*/ char* utfrune(const char* s, Rune r);
  
  
  // utfrrune returns a pointer to the last occurrence of rune r in the
@@ -134,7 +129,7 @@ const char* utfrune(const char* s, Rune r);
  // byte terminating a string is considered to be part of the string s.
  // (cf. strrchr)
  
-const char* utfrrune(const char* s, Rune r);
+/*const*/ char* utfrrune(const char* s, Rune r);
  
  
  // utfutf returns a pointer to the first occurrence of the UTF string
@@ -155,7 +150,7 @@ char* utfecpy(char *s1, char *es1, const char *s2);
  
  // These functions are rune-string analogues of the corresponding
  // functions in strcat (3).
-// 
+//
  // These routines first appeared in Plan 9.
  // SEE ALSO
  // memmove (3)
@@ -208,8 +203,8 @@ Rune totitlerune(Rune r);
  
  // isupperrune tests for upper case characters, including Unicode
  // upper case letters and targets of the toupper mapping. islowerrune
-// and istitlerune are defined analogously. 
- 
+// and istitlerune are defined analogously.
+
  int isupperrune(Rune r);
  int islowerrune(Rune r);
  int istitlerune(Rune r);
@@ -227,12 +222,6 @@ int isalpharune(Rune r);
  int isdigitrune(Rune r);
  
  
-// isideographicrune tests for ideographic characters and numbers, as
-// defined by the Unicode standard.
-
-int isideographicrune(Rune r);
-
-
  // isspacerune tests for whitespace characters, including "C" locale
  // whitespace, Unicode defined whitespace, and the "zero-width
  // non-break space" character.
diff --git a/cpp/src/phonenumbers/utf/utfdef.h b/cpp/src/phonenumbers/utf/utfdef.h

index adc6d95..4bbdfc6 100644 (file)
--- a/cpp/src/phonenumbers/utf/utfdef.h
+++ b/cpp/src/phonenumbers/utf/utfdef.h
@@ -25,4 +25,3 @@ typedef unsigned int          uint;
  typedef unsigned long          ulong;
  
  #define nelem(x) (sizeof(x)/sizeof((x)[0]))
-#define nil ((void*)0)
author	Fredrik Roubert <roubert@google.com>
	Thu, 28 Aug 2014 14:50:22 +0000 (14:50 +0000)
committer	Youngjae Shin <yj99.shin@samsung.com>
	Tue, 9 Jun 2015 11:43:19 +0000 (20:43 +0900)
cpp/src/phonenumbers/utf/rune.c		patch \| blob \| history
cpp/src/phonenumbers/utf/unicodetext.cc		patch \| blob \| history
cpp/src/phonenumbers/utf/unilib.cc		patch \| blob \| history
cpp/src/phonenumbers/utf/utf.h		patch \| blob \| history
cpp/src/phonenumbers/utf/utfdef.h		patch \| blob \| history