Accept non-characters when validating Unicode

author Simon McVittie <simon.mcvittie@collabora.co.uk>

Mon, 22 Apr 2013 14:36:32 +0000 (15:36 +0100)

committer Simon McVittie <simon.mcvittie@collabora.co.uk>

Mon, 22 Apr 2013 14:36:32 +0000 (15:36 +0100)
author Simon McVittie <simon.mcvittie@collabora.co.uk>
Mon, 22 Apr 2013 14:36:32 +0000 (15:36 +0100)
committer Simon McVittie <simon.mcvittie@collabora.co.uk>
Mon, 22 Apr 2013 14:36:32 +0000 (15:36 +0100)
diff --git a/dbus/dbus-string.c b/dbus/dbus-string.c

index 9accdb1..e3766aa 100644 (file)
--- a/dbus/dbus-string.c
+++ b/dbus/dbus-string.c
@@ -1577,19 +1577,11 @@ _dbus_string_split_on_byte (DBusString        *source,
   *
   * The second check covers surrogate pairs (category Cs).
   *
- * The last two checks cover "Noncharacter": defined as:
- *   "A code point that is permanently reserved for
- *    internal use, and that should never be interchanged. In
- *    Unicode 3.1, these consist of the values U+nFFFE and U+nFFFF
- *    (where n is from 0 to 10_16) and the values U+FDD0..U+FDEF."
- *
   * @param Char the character
   */
  #define UNICODE_VALID(Char)                   \
      ((Char) < 0x110000 &&                     \
-     (((Char) & 0xFFFFF800) != 0xD800) &&     \
-     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \
-     ((Char) & 0xFFFE) != 0xFFFE)
+     (((Char) & 0xFFFFF800) != 0xD800))
  
  /**
   * Finds the given substring in the string,
diff --git a/test/syntax.c b/test/syntax.c

index 88db963..e26b364 100644 (file)
--- a/test/syntax.c
+++ b/test/syntax.c
@@ -178,12 +178,14 @@ const char * const invalid_single_signatures[] = {
  
  const char * const valid_strings[] = {
      "",
-    "\xc2\xa9",
+    "\xc2\xa9",       /* UTF-8 (c) symbol */
+    "\xef\xbf\xbe",   /* U+FFFE is reserved but Corrigendum 9 says it's OK */
      NULL
  };
  
  const char * const invalid_strings[] = {
-    "\xa9",
+    "\xa9",           /* Latin-1 (c) symbol */
+    "\xed\xa0\x80",   /* UTF-16 surrogates are not valid in UTF-8 */
      NULL
  };
author	Simon McVittie <simon.mcvittie@collabora.co.uk>
	Mon, 22 Apr 2013 14:36:32 +0000 (15:36 +0100)
committer	Simon McVittie <simon.mcvittie@collabora.co.uk>
	Mon, 22 Apr 2013 14:36:32 +0000 (15:36 +0100)
dbus/dbus-string.c		patch \| blob \| history
test/syntax.c		patch \| blob \| history