From: Matthias Clasen <mclasen@redhat.com>
Date: Thu, 9 Aug 2007 02:06:04 +0000 (+0000)
Subject: Handle restricted characters by converting them to numeric character
X-Git-Tag: GLIB_2_14_1~85
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=28f781501e4c9a7f83b459d83f14c396fce0c981;p=platform%2Fupstream%2Fglib.git

Handle restricted characters by converting them to numeric character

2007-08-08  Matthias Clasen  <mclasen@redhat.com>

        * glib/gmarkup.c (append_escaped_text): Handle restricted
        characters by converting them to numeric character
        entities.  (#464145, Andreas Monitzer)

        * tests/markup-escape-test.c: Add tests for restricted
        characters and numeric character entities.


svn path=/trunk/; revision=5684
---

diff --git a/ChangeLog b/ChangeLog
index b945832..fa10f19 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2007-08-08  Matthias Clasen  <mclasen@redhat.com>
 
+	* glib/gmarkup.c (append_escaped_text): Handle restricted
+	characters by converting them to numeric character 
+	entities.  (#464145, Andreas Monitzer)
+
+	* tests/markup-escape-test.c: Add tests for restricted
+	characters and numeric character entities. 
+
+2007-08-08  Matthias Clasen  <mclasen@redhat.com>
+
 	* glib/glib.symbols:
 	* glib/Makefile.am:
 	* glib/abicheck.sh: Make it work regardless of --enable-debug
diff --git a/glib/gmarkup.c b/glib/gmarkup.c
index e0179f3..4c94159 100644
--- a/glib/gmarkup.c
+++ b/glib/gmarkup.c
@@ -955,7 +955,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
               set_error (context,
                          error,
                          G_MARKUP_ERROR_BAD_UTF8,
-                         _("Invalid UTF-8 encoded text"));
+                         _("Invalid UTF-8 encoded text - overlong sequence"));
             }
           
           goto finished;
@@ -983,7 +983,7 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
       set_error (context,
                  error,
                  G_MARKUP_ERROR_BAD_UTF8,
-                 _("Invalid UTF-8 encoded text"));
+                 _("Invalid UTF-8 encoded text - not a start char"));
       goto finished;
     }
 
@@ -1019,7 +1019,9 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
       set_error (context,
                  error,
                  G_MARKUP_ERROR_BAD_UTF8,
-                 _("Invalid UTF-8 encoded text"));
+                 _("Invalid UTF-8 encoded text - not valid '%s'"),
+                 g_strndup (context->current_text,
+                            context->current_text_len));
       goto finished;
     }
 
@@ -1900,6 +1902,7 @@ append_escaped_text (GString     *str,
 {
   const gchar *p;
   const gchar *end;
+  gunichar c;
 
   p = text;
   end = text + length;
@@ -1932,7 +1935,15 @@ append_escaped_text (GString     *str,
           break;
 
         default:
-          g_string_append_len (str, p, next - p);
+          c = g_utf8_get_char (p);
+          if ((0x1 <= c && c <= 0x8) ||
+              (0xb <= c && c  <= 0xc) ||
+              (0xe <= c && c <= 0x1f) ||
+              (0x7f <= c && c <= 0x84) ||
+              (0x86 <= c && c <= 0x9f))
+            g_string_append_printf (str, "&#x%x;", c);
+          else
+            g_string_append_len (str, p, next - p);
           break;
         }
 
diff --git a/tests/markup-escape-test.c b/tests/markup-escape-test.c
index 667d4dc..2734162 100644
--- a/tests/markup-escape-test.c
+++ b/tests/markup-escape-test.c
@@ -27,6 +27,24 @@ test (const gchar *original,
 }
 
 static void
+test_unichar (gunichar c, 
+              gboolean entity)
+{
+  gint len;
+  gchar outbuf[7], expected[12];
+
+  len = g_unichar_to_utf8 (c, outbuf);
+  outbuf[len] = 0;
+
+  if (entity)
+    g_snprintf (expected, 12, "&#x%x;", c);
+  else
+    strcpy (expected, outbuf);
+
+  test (outbuf, expected);
+}
+
+static void
 test_format (const gchar *format,
 	     const gchar *expected,
 	     ...)
@@ -67,6 +85,25 @@ int main (int argc, char **argv)
   test ("A&&", "A&amp;&amp;");
   test ("A&&A", "A&amp;&amp;A");
   test ("A&A&A", "A&amp;A&amp;A");
+  test ("A&#23;A", "A&amp;#23;A");
+  test ("A&#xa;A", "A&amp;#xa;A");
+  test_unichar (0x1, TRUE);
+  test_unichar (0x8, TRUE);
+  test_unichar (0x9, FALSE);
+  test_unichar (0xa, FALSE);
+  test_unichar (0xb, TRUE);
+  test_unichar (0xc, TRUE);
+  test_unichar (0xd, FALSE);
+  test_unichar (0xe, TRUE);
+  test_unichar (0x1f, TRUE);
+  test_unichar (0x20, FALSE);
+  test_unichar (0x7e, FALSE);
+  test_unichar (0x7f, TRUE);
+  test_unichar (0x84, TRUE);
+  test_unichar (0x85, FALSE);
+  test_unichar (0x86, TRUE);
+  test_unichar (0x9f, TRUE);
+  test_unichar (0xa0, FALSE);
   
   /* Tests for g_markup_printf_escaped() */
   test_format ("A", "A");