New files implementing GSequence, a list implemented using a binary tree.

[platform/upstream/glib.git] / glib / guniprop.c
diff --git a/glib/guniprop.c b/glib/guniprop.c

index 6a9776d..908d3fe 100644 (file)
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -29,6 +29,7 @@
  #include "glib.h"
  #include "gunichartables.h"
  #include "gmirroringtable.h"
+#include "gscripttable.h"
  #include "gunicodeprivate.h"
  #include "galias.h"
  
@@ -62,11 +63,6 @@
  
  
  
-#define ISDIGIT(Type)  IS ((Type),                             \
-                           OR (G_UNICODE_DECIMAL_NUMBER,       \
-                           OR (G_UNICODE_LETTER_NUMBER,        \
-                           OR (G_UNICODE_OTHER_NUMBER,         0))))
-
  #define ISALPHA(Type)  IS ((Type),                             \
                             OR (G_UNICODE_LOWERCASE_LETTER,     \
                             OR (G_UNICODE_UPPERCASE_LETTER,     \
@@ -89,6 +85,11 @@
                             OR (G_UNICODE_COMBINING_MARK,       \
                             OR (G_UNICODE_ENCLOSING_MARK,       0))))
  
+#define ISZEROWIDTHTYPE(Type)  IS ((Type),                     \
+                           OR (G_UNICODE_NON_SPACING_MARK,     \
+                           OR (G_UNICODE_ENCLOSING_MARK,       \
+                           OR (G_UNICODE_FORMAT,               0))))
+
  /**
   * g_unichar_isalnum:
   * @c: a Unicode character
@@ -320,8 +321,8 @@ g_unichar_istitle (gunichar c)
    unsigned int i;
    for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
      if (title_table[i][0] == c)
-      return 1;
-  return 0;
+      return TRUE;
+  return FALSE;
  }
  
  /**
@@ -337,7 +338,7 @@ g_unichar_isxdigit (gunichar c)
  {
    return ((c >= 'a' && c <= 'f')
           || (c >= 'A' && c <= 'F')
-         || ISDIGIT (TYPE (c)));
+         || (TYPE (c) == G_UNICODE_DECIMAL_NUMBER));
  }
  
  /**
@@ -356,6 +357,40 @@ g_unichar_isdefined (gunichar c)
  }
  
  /**
+ * g_unichar_iszerowidth:
+ * @c: a Unicode character
+ * 
+ * Determines if a given character typically takes zero width when rendered.
+ * The return value is %TRUE for all non-spacing and enclosing marks
+ * (e.g., combining accents), format characters, zero-width
+ * space, but not U+00AD SOFT HYPHEN.
+ *
+ * A typical use of this function is with one of g_unichar_iswide() or
+ * g_unichar_iswide_cjk() to determine the number of cells a string occupies
+ * when displayed on a grid display (terminals).  However, note that not all
+ * terminals support zero-width rendering of zero-width marks.
+ *
+ * Return value: %TRUE if the character has zero width
+ *
+ * Since: 2.14
+ **/
+gboolean
+g_unichar_iszerowidth (gunichar c)
+{
+  if (G_UNLIKELY (c == 0x00AD))
+    return FALSE;
+
+  if (G_UNLIKELY (ISZEROWIDTHTYPE (TYPE (c))))
+    return TRUE;
+
+  if (G_UNLIKELY ((c >= 0x1160 && c < 0x1200) ||
+                 c == 0x200B))
+    return TRUE;
+
+  return FALSE;
+}
+
+/**
   * g_unichar_iswide:
   * @c: a Unicode character
   * 
@@ -390,11 +425,11 @@ struct Interval
    gunichar start, end;
  };
  
-int
+static int
  interval_compare (const void *key, const void *elt)
  {
    gunichar c = GPOINTER_TO_UINT (key);
-  struct Interval *interval = elt;
+  struct Interval *interval = (struct Interval *)elt;
  
    if (c < interval->start)
      return -1;
@@ -514,7 +549,12 @@ g_unichar_toupper (gunichar c)
           return g_utf8_get_char (p);
         }
        else
-       return val ? val : c;
+        {
+         /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR,
+          * do not have an uppercase equivalent, in which case val will be
+          * zero. */
+         return val ? val : c;
+       }
      }
    else if (t == G_UNICODE_TITLECASE_LETTER)
      {
@@ -551,7 +591,11 @@ g_unichar_tolower (gunichar c)
           return g_utf8_get_char (p);
         }
        else
-       return val ? val : c;
+       {
+         /* Not all uppercase letters are guaranteed to have a lowercase
+          * equivalent.  If this is the case, val will be zero. */
+         return val ? val : c;
+       }
      }
    else if (t == G_UNICODE_TITLECASE_LETTER)
      {
@@ -823,11 +867,17 @@ real_toupper (const gchar *str,
                   for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
                     {
                       if (title_table[i][0] == c)
-                       val = title_table[i][1];
+                       {
+                         val = title_table[i][1];
+                         break;
+                       }
                     }
                 }
  
-             len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
+             /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR,
+              * do not have an uppercase equivalent, in which case val will be
+              * zero. */
+             len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
             }
         }
        else
@@ -1007,11 +1057,16 @@ real_tolower (const gchar *str,
                   for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
                     {
                       if (title_table[i][0] == c)
-                       val = title_table[i][2];
+                       {
+                         val = title_table[i][2];
+                         break;
+                       }
                     }
                 }
  
-             len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
+             /* Not all uppercase letters are guaranteed to have a lowercase
+              * equivalent.  If this is the case, val will be zero. */
+             len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
             }
         }
        else
@@ -1168,5 +1223,55 @@ g_unichar_get_mirror_char (gunichar ch,
  
  }
  
+#define G_SCRIPT_TABLE_MIDPOINT (G_N_ELEMENTS (g_script_table) / 2)
+
+static inline GUnicodeScript
+g_unichar_get_script_bsearch (gunichar ch)
+{
+  int lower = 0;
+  int upper = G_N_ELEMENTS (g_script_table) - 1;
+  static int saved_mid = G_SCRIPT_TABLE_MIDPOINT;
+  int mid = saved_mid;
+
+
+  do 
+    {
+      if (ch < g_script_table[mid].start)
+       upper = mid - 1;
+      else if (ch >= g_script_table[mid].start + g_script_table[mid].chars)
+       lower = mid + 1;
+      else
+       return g_script_table[saved_mid = mid].script;
+
+      mid = (lower + upper) / 2;
+    }
+  while (lower <= upper);
+
+  return G_UNICODE_SCRIPT_UNKNOWN;
+}
+
+/**
+ * g_unichar_get_script:
+ * @ch: a Unicode character
+ * 
+ * Looks up the #GUnicodeScript for a particular character (as defined 
+ * by Unicode Standard Annex #24). No check is made for @ch being a
+ * valid Unicode character; if you pass in invalid character, the
+ * result is undefined.
+ * 
+ * Return value: the #GUnicodeScript for the character.
+ *
+ * Since: 2.14
+ */
+GUnicodeScript
+g_unichar_get_script (gunichar ch)
+{
+  if (ch < G_EASY_SCRIPTS_RANGE)
+    return g_script_easy_table[ch];
+  else 
+    return g_unichar_get_script_bsearch (ch); 
+}
+
+
  #define __G_UNIPROP_C__
  #include "galiasdef.c"