From f91ef4ef15d220f6899c97aaf5b1c0a8f68cfe9a Mon Sep 17 00:00:00 2001 From: Christian Persch Date: Mon, 25 Feb 2013 14:48:14 +0100 Subject: [PATCH] unicode: Allow noncharacters Implement unicode corrigendum #9. https://bugzilla.gnome.org/show_bug.cgi?id=694669 --- glib/gutf8.c | 14 +++----------- glib/tests/unicode.c | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/glib/gutf8.c b/glib/gutf8.c index e61d33d..9244fe8 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -104,22 +104,14 @@ * a point above 0x0010ffff, since UTF-16 couldn't represent it. * * The second check covers surrogate pairs (category Cs). - * - * The last two checks cover "Noncharacter": defined as: - * "A code point that is permanently reserved for - * internal use, and that should never be interchanged. In - * Unicode 3.1, these consist of the values U+nFFFE and U+nFFFF - * (where n is from 0 to 10_16) and the values U+FDD0..U+FDEF." * * @param Char the character */ #define UNICODE_VALID(Char) \ ((Char) < 0x110000 && \ - (((Char) & 0xFFFFF800) != 0xD800) && \ - ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ - ((Char) & 0xFFFE) != 0xFFFE) - - + (((Char) & 0xFFFFF800) != 0xD800)) + + static const gchar utf8_skip_data[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c index bee0def..320e6e0 100644 --- a/glib/tests/unicode.c +++ b/glib/tests/unicode.c @@ -33,7 +33,7 @@ test_unichar_validate (void) g_assert (g_unichar_validate ('j')); g_assert (g_unichar_validate (8356)); g_assert (g_unichar_validate (8356)); - g_assert (!g_unichar_validate (0xfdd1)); + g_assert (g_unichar_validate (0xfdd1)); g_assert (g_unichar_validate (917760)); g_assert (!g_unichar_validate (0x110000)); } -- 2.7.4