From: Ryan Lortie Date: Thu, 3 Jun 2010 07:41:33 +0000 (+0200) Subject: Bug 620349 – utf8ify GVariant printer X-Git-Tag: 2.25.8~41 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=44db2b6b7447680fa3f8d3bce6f2bda26a6b498e;p=platform%2Fupstream%2Fglib.git Bug 620349 – utf8ify GVariant printer Take advantage of our knowledge that GVariant strings are always valid utf8 when printing and parsing: - allow valid printing unicode characters to pass through unescaped - escape non-printing characters using \uxxxx or \Uxxxxxxxx format - do the same in the parser - update existing test cases to use utf8, add a new test case --- diff --git a/glib/gvariant-parser.c b/glib/gvariant-parser.c index 1020633..9bca95f 100644 --- a/glib/gvariant-parser.c +++ b/glib/gvariant-parser.c @@ -1409,6 +1409,42 @@ string_free (AST *ast) g_slice_free (String, string); } +static gboolean +unicode_unescape (const gchar *src, + gint *src_ofs, + gchar *dest, + gint *dest_ofs, + gint length, + SourceRef *ref, + GError **error) +{ + gchar buffer[9]; + guint64 value; + gchar *end; + + (*src_ofs)++; + + g_assert (length < sizeof (buffer)); + strncpy (buffer, src + *src_ofs, length); + buffer[length] = '\0'; + + value = g_ascii_strtoull (buffer, &end, 0x10); + + if (value == 0 || end != buffer + length) + { + parser_set_error (error, ref, NULL, + "invalid %d-character unicode escape", length); + return FALSE; + } + + g_assert (value <= G_MAXUINT32); + + *dest_ofs += g_unichar_to_utf8 (value, dest + *dest_ofs); + *src_ofs += length; + + return TRUE; +} + static AST * string_parse (TokenStream *stream, va_list *app, @@ -1455,27 +1491,29 @@ string_parse (TokenStream *stream, g_free (token); return NULL; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - { - /* up to 3 characters */ - guchar val = token[i++] - '0'; - - if ('0' <= token[i] && token[i] < '8') - val = (val << 3) | (token[i++] - '0'); - - if ('0' <= token[i] && token[i] < '8') - val = (val << 3) | (token[i++] - '0'); + case 'u': + if (!unicode_unescape (token, &i, str, &j, 4, &ref, error)) + { + g_free (token); + return NULL; + } + continue; - str[j++] = val; - } + case 'U': + if (!unicode_unescape (token, &i, str, &j, 8, &ref, error)) + { + g_free (token); + return NULL; + } continue; + case 'a': str[j++] = '\a'; i++; continue; case 'b': str[j++] = '\b'; i++; continue; case 'f': str[j++] = '\f'; i++; continue; case 'n': str[j++] = '\n'; i++; continue; case 'r': str[j++] = '\r'; i++; continue; case 't': str[j++] = '\t'; i++; continue; + case 'v': str[j++] = '\v'; i++; continue; case '\n': i++; continue; } diff --git a/glib/gvariant.c b/glib/gvariant.c index 9dca0d0..47b6fbe 100644 --- a/glib/gvariant.c +++ b/glib/gvariant.c @@ -1716,15 +1716,66 @@ g_variant_print_string (GVariant *value, case G_VARIANT_CLASS_STRING: { const gchar *str = g_variant_get_string (value, NULL); - gchar *escaped = g_strescape (str, NULL); + gunichar quote = strchr (str, '\'') ? '"' : '\''; - /* use double quotes only if a ' is in the string */ - if (strchr (str, '\'')) - g_string_append_printf (string, "\"%s\"", escaped); - else - g_string_append_printf (string, "'%s'", escaped); + g_string_append_c (string, quote); + + while (*str) + { + gunichar c = g_utf8_get_char (str); + + if (c == quote || c == '\\') + g_string_append_c (string, '\\'); + + if (g_unichar_isprint (c)) + g_string_append_unichar (string, c); + + else + { + g_string_append_c (string, '\\'); + if (c < 0x10000) + switch (c) + { + case '\a': + g_string_append_c (string, 'a'); + break; + + case '\b': + g_string_append_c (string, 'b'); + break; + + case '\f': + g_string_append_c (string, 'f'); + break; + + case '\n': + g_string_append_c (string, 'n'); + break; + + case '\r': + g_string_append_c (string, 'r'); + break; + + case '\t': + g_string_append_c (string, 't'); + break; + + case '\v': + g_string_append_c (string, 'v'); + break; + + default: + g_string_append_printf (string, "u%04x", c); + break; + } + else + g_string_append_printf (string, "U%08x", c); + } + + str = g_utf8_next_char (str); + } - g_free (escaped); + g_string_append_c (string, quote); } break; diff --git a/glib/tests/gvariant.c b/glib/tests/gvariant.c index 020a3fd..68c6ec4 100644 --- a/glib/tests/gvariant.c +++ b/glib/tests/gvariant.c @@ -3589,18 +3589,20 @@ test_parses (void) /* mini test */ { - gchar str[256]; + GError *error = NULL; + gchar str[128]; GVariant *val; gchar *p, *p2; - for (i = 0; i < 256; i++) + for (i = 0; i < 127; i++) str[i] = i + 1; + str[i] = 0; val = g_variant_new_string (str); p = g_variant_print (val, FALSE); g_variant_unref (val); - val = g_variant_parse (NULL, p, NULL, NULL, NULL); + val = g_variant_parse (NULL, p, NULL, NULL, &error); p2 = g_variant_print (val, FALSE); g_assert_cmpstr (str, ==, g_variant_get_string (val, NULL)); @@ -3623,6 +3625,24 @@ test_parses (void) g_variant_unref (value); } + /* unicode mini test */ + { + /* ał𝄞 */ + const gchar orig[] = "a\xc5\x82\xf0\x9d\x84\x9e \t\n"; + GVariant *value; + gchar *printed; + + value = g_variant_new_string (orig); + printed = g_variant_print (value, FALSE); + g_variant_unref (value); + + g_assert_cmpstr (printed, ==, "'a\xc5\x82\xf0\x9d\x84\x9e \\t\\n'"); + value = g_variant_parse (NULL, printed, NULL, NULL, NULL); + g_assert_cmpstr (g_variant_get_string (value, NULL), ==, orig); + g_variant_unref (value); + g_free (printed); + } + g_variant_type_info_assert_no_infos (); }