#include "glib.h"
#include "gunichartables.h"
#include "gmirroringtable.h"
+#include "gscripttable.h"
#include "gunicodeprivate.h"
#include "galias.h"
-#define ISDIGIT(Type) IS ((Type), \
- OR (G_UNICODE_DECIMAL_NUMBER, \
- OR (G_UNICODE_LETTER_NUMBER, \
- OR (G_UNICODE_OTHER_NUMBER, 0))))
-
#define ISALPHA(Type) IS ((Type), \
OR (G_UNICODE_LOWERCASE_LETTER, \
OR (G_UNICODE_UPPERCASE_LETTER, \
OR (G_UNICODE_COMBINING_MARK, \
OR (G_UNICODE_ENCLOSING_MARK, 0))))
+#define ISZEROWIDTHTYPE(Type) IS ((Type), \
+ OR (G_UNICODE_NON_SPACING_MARK, \
+ OR (G_UNICODE_ENCLOSING_MARK, \
+ OR (G_UNICODE_FORMAT, 0))))
+
/**
* g_unichar_isalnum:
* @c: a Unicode character
unsigned int i;
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
if (title_table[i][0] == c)
- return 1;
- return 0;
+ return TRUE;
+ return FALSE;
}
/**
{
return ((c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F')
- || ISDIGIT (TYPE (c)));
+ || (TYPE (c) == G_UNICODE_DECIMAL_NUMBER));
}
/**
}
/**
+ * g_unichar_iszerowidth:
+ * @c: a Unicode character
+ *
+ * Determines if a given character typically takes zero width when rendered.
+ * The return value is %TRUE for all non-spacing and enclosing marks
+ * (e.g., combining accents), format characters, zero-width
+ * space, but not U+00AD SOFT HYPHEN.
+ *
+ * A typical use of this function is with one of g_unichar_iswide() or
+ * g_unichar_iswide_cjk() to determine the number of cells a string occupies
+ * when displayed on a grid display (terminals). However, note that not all
+ * terminals support zero-width rendering of zero-width marks.
+ *
+ * Return value: %TRUE if the character has zero width
+ *
+ * Since: 2.14
+ **/
+gboolean
+g_unichar_iszerowidth (gunichar c)
+{
+ if (G_UNLIKELY (c == 0x00AD))
+ return FALSE;
+
+ if (G_UNLIKELY (ISZEROWIDTHTYPE (TYPE (c))))
+ return TRUE;
+
+ if (G_UNLIKELY ((c >= 0x1160 && c < 0x1200) ||
+ c == 0x200B))
+ return TRUE;
+
+ return FALSE;
+}
+
+/**
* g_unichar_iswide:
* @c: a Unicode character
*
gunichar start, end;
};
-int
+static int
interval_compare (const void *key, const void *elt)
{
gunichar c = GPOINTER_TO_UINT (key);
- struct Interval *interval = elt;
+ struct Interval *interval = (struct Interval *)elt;
if (c < interval->start)
return -1;
return g_utf8_get_char (p);
}
else
- return val ? val : c;
+ {
+ /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR,
+ * do not have an uppercase equivalent, in which case val will be
+ * zero. */
+ return val ? val : c;
+ }
}
else if (t == G_UNICODE_TITLECASE_LETTER)
{
return g_utf8_get_char (p);
}
else
- return val ? val : c;
+ {
+ /* Not all uppercase letters are guaranteed to have a lowercase
+ * equivalent. If this is the case, val will be zero. */
+ return val ? val : c;
+ }
}
else if (t == G_UNICODE_TITLECASE_LETTER)
{
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c)
- val = title_table[i][1];
+ {
+ val = title_table[i][1];
+ break;
+ }
}
}
- len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
+ /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR,
+ * do not have an uppercase equivalent, in which case val will be
+ * zero. */
+ len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
}
}
else
for (i = 0; i < G_N_ELEMENTS (title_table); ++i)
{
if (title_table[i][0] == c)
- val = title_table[i][2];
+ {
+ val = title_table[i][2];
+ break;
+ }
}
}
- len += g_unichar_to_utf8 (val, out_buffer ? out_buffer + len : NULL);
+ /* Not all uppercase letters are guaranteed to have a lowercase
+ * equivalent. If this is the case, val will be zero. */
+ len += g_unichar_to_utf8 (val ? val : c, out_buffer ? out_buffer + len : NULL);
}
}
else
}
+#define G_SCRIPT_TABLE_MIDPOINT (G_N_ELEMENTS (g_script_table) / 2)
+
+static inline GUnicodeScript
+g_unichar_get_script_bsearch (gunichar ch)
+{
+ int lower = 0;
+ int upper = G_N_ELEMENTS (g_script_table) - 1;
+ static int saved_mid = G_SCRIPT_TABLE_MIDPOINT;
+ int mid = saved_mid;
+
+
+ do
+ {
+ if (ch < g_script_table[mid].start)
+ upper = mid - 1;
+ else if (ch >= g_script_table[mid].start + g_script_table[mid].chars)
+ lower = mid + 1;
+ else
+ return g_script_table[saved_mid = mid].script;
+
+ mid = (lower + upper) / 2;
+ }
+ while (lower <= upper);
+
+ return G_UNICODE_SCRIPT_UNKNOWN;
+}
+
+/**
+ * g_unichar_get_script:
+ * @ch: a Unicode character
+ *
+ * Looks up the #GUnicodeScript for a particular character (as defined
+ * by Unicode Standard Annex #24). No check is made for @ch being a
+ * valid Unicode character; if you pass in invalid character, the
+ * result is undefined.
+ *
+ * Return value: the #GUnicodeScript for the character.
+ *
+ * Since: 2.14
+ */
+GUnicodeScript
+g_unichar_get_script (gunichar ch)
+{
+ if (ch < G_EASY_SCRIPTS_RANGE)
+ return g_script_easy_table[ch];
+ else
+ return g_unichar_get_script_bsearch (ch);
+}
+
+
#define __G_UNIPROP_C__
#include "galiasdef.c"