From f1f680b68c946a2d2f6af3c942ce1b4c034f1702 Mon Sep 17 00:00:00 2001 From: Owen Taylor Date: Sat, 7 Jul 2001 02:42:49 +0000 Subject: [PATCH] Add length arguments to g_utf8_{strup,strdown,casefold,collate_key}. Fri Jul 6 22:34:32 2001 Owen Taylor * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c glib/gunicollate.c: Add length arguments to g_utf8_{strup,strdown,casefold,collate_key}. * glib/gdate.c: Fix for above. --- ChangeLog | 9 ++++++++- ChangeLog.pre-2-0 | 9 ++++++++- ChangeLog.pre-2-10 | 9 ++++++++- ChangeLog.pre-2-12 | 9 ++++++++- ChangeLog.pre-2-2 | 9 ++++++++- ChangeLog.pre-2-4 | 9 ++++++++- ChangeLog.pre-2-6 | 9 ++++++++- ChangeLog.pre-2-8 | 9 ++++++++- glib/gdate.c | 12 ++++++------ glib/gunicode.h | 13 +++++++++---- glib/gunicollate.c | 19 +++++++++++-------- glib/gunidecomp.c | 9 ++++++--- glib/guniprop.c | 42 +++++++++++++++++++++++++----------------- tests/unicode-caseconv.c | 6 +++--- tests/unicode-collate.c | 4 ++-- tests/unicode-normalize.c | 6 +++--- 16 files changed, 129 insertions(+), 54 deletions(-) diff --git a/ChangeLog b/ChangeLog index a475bac..83ccc0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-0 b/ChangeLog.pre-2-0 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-0 +++ b/ChangeLog.pre-2-0 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-10 b/ChangeLog.pre-2-10 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-10 +++ b/ChangeLog.pre-2-10 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-12 b/ChangeLog.pre-2-12 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-12 +++ b/ChangeLog.pre-2-12 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-2 b/ChangeLog.pre-2-2 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-2 +++ b/ChangeLog.pre-2-2 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-4 b/ChangeLog.pre-2-4 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-4 +++ b/ChangeLog.pre-2-4 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-6 b/ChangeLog.pre-2-6 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-6 +++ b/ChangeLog.pre-2-6 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/ChangeLog.pre-2-8 b/ChangeLog.pre-2-8 index a475bac..83ccc0e 100644 --- a/ChangeLog.pre-2-8 +++ b/ChangeLog.pre-2-8 @@ -1,8 +1,15 @@ +Fri Jul 6 22:34:32 2001 Owen Taylor + + * glib/gunicode.h glib/gunidecomp.c glib/guniprop.c + glib/gunicollate.c: Add length arguments to + g_utf8_{strup,strdown,casefold,collate_key}. + + * glib/gdate.c: Fix for above. + 2001-07-06 Pablo Saratxaga * configure.in: added Basque (eu) to ALL_LINGUAS - Mon Jul 2 19:48:52 2001 Andrew Lanoix *giowin32.c: g_source_remove()ing an socket iochannel closes diff --git a/glib/gdate.c b/glib/gdate.c index caab6fa..08b0334 100644 --- a/glib/gdate.c +++ b/glib/gdate.c @@ -515,8 +515,8 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt) gchar *casefold; gchar *normalized; - casefold = g_utf8_casefold (str); - normalized = g_utf8_normalize (casefold, G_NORMALIZE_ALL); + casefold = g_utf8_casefold (str, -1); + normalized = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); g_free (casefold); i = 1; @@ -586,15 +586,15 @@ g_date_prepare_to_parse (const gchar *str, GDateParseTokens *pt) g_date_strftime (buf, 127, "%b", &d); - casefold = g_utf8_casefold (buf); + casefold = g_utf8_casefold (buf, -1); g_free (short_month_names[i]); - short_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL); + short_month_names[i] = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); g_free (casefold); g_date_strftime (buf, 127, "%B", &d); - casefold = g_utf8_casefold (buf); + casefold = g_utf8_casefold (buf, -1); g_free (long_month_names[i]); - long_month_names[i] = g_utf8_normalize (casefold, G_NORMALIZE_ALL); + long_month_names[i] = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); g_free (casefold); ++i; diff --git a/glib/gunicode.h b/glib/gunicode.h index a4af1f7..b7f5e34 100644 --- a/glib/gunicode.h +++ b/glib/gunicode.h @@ -247,9 +247,12 @@ gboolean g_utf8_validate (const gchar *str, /* Validate a Unicode character */ gboolean g_unichar_validate (gunichar ch); -gchar *g_utf8_strup (const gchar *str); -gchar *g_utf8_strdown (const gchar *str); -gchar *g_utf8_casefold (const gchar *str); +gchar *g_utf8_strup (const gchar *str, + gssize len); +gchar *g_utf8_strdown (const gchar *str, + gssize len); +gchar *g_utf8_casefold (const gchar *str, + gssize len); typedef enum { G_NORMALIZE_DEFAULT, @@ -263,11 +266,13 @@ typedef enum { } GNormalizeMode; gchar *g_utf8_normalize (const gchar *str, + gssize len, GNormalizeMode mode); gint g_utf8_collate (const gchar *str1, const gchar *str2); -gchar *g_utf8_collate_key (const gchar *str); +gchar *g_utf8_collate_key (const gchar *str, + gssize len); G_END_DECLS diff --git a/glib/gunicollate.c b/glib/gunicollate.c index bcda032..5c38836 100644 --- a/glib/gunicollate.c +++ b/glib/gunicollate.c @@ -27,6 +27,7 @@ #include "glib.h" extern gunichar *_g_utf8_normalize_wc (const gchar *str, + gssize max_len, GNormalizeMode mode); /** @@ -52,8 +53,8 @@ g_utf8_collate (const gchar *str1, #ifdef __STDC_ISO_10646__ - gunichar *str1_norm = _g_utf8_normalize_wc (str1, G_NORMALIZE_ALL_COMPOSE); - gunichar *str2_norm = _g_utf8_normalize_wc (str2, G_NORMALIZE_ALL_COMPOSE); + gunichar *str1_norm = _g_utf8_normalize_wc (str1, -1, G_NORMALIZE_ALL_COMPOSE); + gunichar *str2_norm = _g_utf8_normalize_wc (str2, -1, G_NORMALIZE_ALL_COMPOSE); result = wcscoll ((wchar_t *)str1_norm, (wchar_t *)str2_norm); @@ -63,8 +64,8 @@ g_utf8_collate (const gchar *str1, #else /* !__STDC_ISO_10646__ */ const gchar *charset; - gchar *str1_norm = g_utf8_normalize (str1, G_NORMALIZE_ALL_COMPOSE); - gchar *str2_norm = g_utf8_normalize (str2, G_NORMALIZE_ALL_COMPOSE); + gchar *str1_norm = g_utf8_normalize (str1, -1, G_NORMALIZE_ALL_COMPOSE); + gchar *str2_norm = g_utf8_normalize (str2, -1, G_NORMALIZE_ALL_COMPOSE); if (g_get_charset (&charset)) { @@ -148,7 +149,8 @@ utf8_encode (char *buf, wchar_t val) /** * g_utf8_collate_key: * @str: a UTF-8 encoded string. - * + * @len: length of @str, in bytes, or -1 if @str is nul-terminated. + * * Converts a string into a collation key that can be compared * with other collation keys using strcmp(). The results of * comparing the collation keys of two strings with strcmp() @@ -159,14 +161,15 @@ utf8_encode (char *buf, wchar_t val) * be freed with g_free when you are done with it. **/ gchar * -g_utf8_collate_key (const gchar *str) +g_utf8_collate_key (const gchar *str, + gssize len) { gchar *result; size_t len; #ifdef __STDC_ISO_10646__ - gunichar *str_norm = _g_utf8_normalize_wc (str, G_NORMALIZE_ALL_COMPOSE); + gunichar *str_norm = _g_utf8_normalize_wc (str, len, G_NORMALIZE_ALL_COMPOSE); wchar_t *result_wc; size_t i; size_t result_len = 0; @@ -194,7 +197,7 @@ g_utf8_collate_key (const gchar *str) #else /* !__STDC_ISO_10646__ */ const gchar *charset; - gchar *str_norm = g_utf8_normalize (str, G_NORMALIZE_ALL_COMPOSE); + gchar *str_norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL_COMPOSE); if (g_get_charset (&charset)) { diff --git a/glib/gunidecomp.c b/glib/gunidecomp.c index 618b75d..6af9320 100644 --- a/glib/gunidecomp.c +++ b/glib/gunidecomp.c @@ -218,6 +218,7 @@ combine (gunichar a, gunichar * _g_utf8_normalize_wc (const gchar *str, + gssize max_len, GNormalizeMode mode) { gsize n_wc; @@ -231,7 +232,7 @@ _g_utf8_normalize_wc (const gchar *str, n_wc = 0; p = str; - while (*p) + while ((max_len < 0 || p < str + max_len) && *p) { gunichar wc = g_utf8_get_char (p); @@ -257,7 +258,7 @@ _g_utf8_normalize_wc (const gchar *str, last_start = 0; n_wc = 0; p = str; - while (*p) + while ((max_len < 0 || p < str + max_len) && *p) { gunichar wc = g_utf8_get_char (p); guchar *decomp; @@ -345,6 +346,7 @@ _g_utf8_normalize_wc (const gchar *str, /** * g_utf8_normalize: * @str: a UTF-8 encoded string. + * @len: length of @str, in bytes, or -1 if @str is nul-terminated. * @mode: the type of normalization to perform. * * Convert a string into canonical form, standardizing @@ -378,9 +380,10 @@ _g_utf8_normalize_wc (const gchar *str, **/ gchar * g_utf8_normalize (const gchar *str, + gssize len, GNormalizeMode mode) { - gunichar *result_wc = _g_utf8_normalize_wc (str, mode); + gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode); gchar *result; result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL); diff --git a/glib/guniprop.c b/glib/guniprop.c index aa2c68a..b5f998b 100644 --- a/glib/guniprop.c +++ b/glib/guniprop.c @@ -588,6 +588,7 @@ output_special_case (gchar *out_buffer, static gsize real_toupper (const gchar *str, + gssize max_len, gchar *out_buffer, LocaleType locale_type) { @@ -596,7 +597,7 @@ real_toupper (const gchar *str, gsize len = 0; gboolean last_was_i = FALSE; - while (*p) + while ((max_len < 0 || p < str + max_len) && *p) { gunichar c = g_utf8_get_char (p); int t = TYPE (c); @@ -693,8 +694,9 @@ real_toupper (const gchar *str, } /** - * g_ut8f_strup: - * @string: a UTF-8 encoded string + * g_utf8_strup: + * @str: a UTF-8 encoded string + * @len: length of @str, in bytes, or -1 if @str is nul-terminated. * * Converts all Unicode characters in the string that have a case * to uppercase. The exact manner that this is done depends @@ -706,7 +708,8 @@ real_toupper (const gchar *str, * converted to uppercase. **/ gchar * -g_utf8_strup (const gchar *str) +g_utf8_strup (const gchar *str, + gssize len) { gsize len; LocaleType locale_type; @@ -719,9 +722,9 @@ g_utf8_strup (const gchar *str) /* * We use a two pass approach to keep memory management simple */ - len = real_toupper (str, NULL, locale_type); + len = real_toupper (str, len, NULL, locale_type); result = g_malloc (len + 1); - real_toupper (str, result, locale_type); + real_toupper (str, len, result, locale_type); result[len] = '\0'; return result; @@ -729,6 +732,7 @@ g_utf8_strup (const gchar *str) static gsize real_tolower (const gchar *str, + gssize max_len, gchar *out_buffer, LocaleType locale_type) { @@ -736,7 +740,7 @@ real_tolower (const gchar *str, const char *last = NULL; gsize len = 0; - while (*p) + while ((max_len < 0 || p < str + max_len) && *p) { gunichar c = g_utf8_get_char (p); int t = TYPE (c); @@ -807,8 +811,9 @@ real_tolower (const gchar *str, } /** - * g_ut8f_strdown: - * @string: a UTF-8 encoded string + * g_utf8_strdown: + * @str: a UTF-8 encoded string + * @len: length of @str, in bytes, or -1 if @str is nul-terminated. * * Converts all Unicode characters in the string that have a case * to lowercase. The exact manner that this is done depends @@ -819,7 +824,8 @@ real_tolower (const gchar *str, * converted to lowercase. **/ gchar * -g_utf8_strdown (const gchar *str) +g_utf8_strdown (const gchar *str, + gssize len) { gsize len; LocaleType locale_type; @@ -832,9 +838,9 @@ g_utf8_strdown (const gchar *str) /* * We use a two pass approach to keep memory management simple */ - len = real_tolower (str, NULL, locale_type); + len = real_tolower (str, len, NULL, locale_type); result = g_malloc (len + 1); - real_tolower (str, result, locale_type); + real_tolower (str, len, result, locale_type); result[len] = '\0'; return result; @@ -843,6 +849,7 @@ g_utf8_strdown (const gchar *str) /** * g_utf8_casefold: * @str: a UTF-8 encoded string + * @len: length of @str, in bytes, or -1 if @str is nul-terminated. * * Converts a string into a form that is independent of case. The * result will not correspond to any particular case, but can be @@ -860,15 +867,16 @@ g_utf8_strdown (const gchar *str) * case independent form of @str. **/ gchar * -g_utf8_casefold (const gchar *str) +g_utf8_casefold (const gchar *str, + gssize len) { GString *result = g_string_new (NULL); const char *p; gchar buf[6]; - int len; + int charlen; p = str; - while (*p) + while ((len < 0 || p < str + len) && *p) { gunichar ch = g_utf8_get_char (p); @@ -896,8 +904,8 @@ g_utf8_casefold (const gchar *str) } ch = g_unichar_tolower (ch); - len = g_unichar_to_utf8 (ch, buf); - g_string_append_len (result, buf, len); + charlen = g_unichar_to_utf8 (ch, buf); + g_string_append_len (result, buf, charlen); next: p = g_utf8_next_char (p); diff --git a/tests/unicode-caseconv.c b/tests/unicode-caseconv.c index 6b65a70..65836e1 100644 --- a/tests/unicode-caseconv.c +++ b/tests/unicode-caseconv.c @@ -54,7 +54,7 @@ int main (int argc, char **argv) test = strings[1]; - convert = g_utf8_strup (test); + convert = g_utf8_strup (test, -1); if (strcmp (convert, strings[4]) != 0) { fprintf (stderr, "Failure: toupper(%s) == %s, should have been %s\n", @@ -63,7 +63,7 @@ int main (int argc, char **argv) } g_free (convert); - convert = g_utf8_strdown (test); + convert = g_utf8_strdown (test, -1); if (strcmp (convert, strings[2]) != 0) { fprintf (stderr, "Failure: tolower(%s) == %s, should have been %s\n", @@ -98,7 +98,7 @@ int main (int argc, char **argv) test = strings[0]; - convert = g_utf8_casefold (test); + convert = g_utf8_casefold (test, -1); if (strcmp (convert, strings[1]) != 0) { fprintf (stderr, "Failure: casefold(%s) == '%s', should have been '%s'\n", diff --git a/tests/unicode-collate.c b/tests/unicode-collate.c index 44cedb5..0ed2735 100644 --- a/tests/unicode-collate.c +++ b/tests/unicode-collate.c @@ -41,7 +41,7 @@ int main (int argc, char **argv) if (argc == 2) { - in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error); + in = g_io_channel_new_file (argv[1], "r", &error); if (!in) { fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message); @@ -64,7 +64,7 @@ int main (int argc, char **argv) str[term_pos] = '\0'; - line.key = g_utf8_collate_key (str); + line.key = g_utf8_collate_key (str, -1); line.str = str; g_array_append_val (line_array, line); diff --git a/tests/unicode-normalize.c b/tests/unicode-normalize.c index 0bd80df..da6c333 100644 --- a/tests/unicode-normalize.c +++ b/tests/unicode-normalize.c @@ -67,7 +67,7 @@ test_form (int line, { for (i = 0; i < 3; i++) { - char *result = g_utf8_normalize (c[i], mode); + char *result = g_utf8_normalize (c[i], -1, mode); if (strcmp (result, c[expected]) != 0) { fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]); @@ -83,7 +83,7 @@ test_form (int line, { for (i = 3; i < 5; i++) { - char *result = g_utf8_normalize (c[i], mode); + char *result = g_utf8_normalize (c[i], -1, mode); if (strcmp (result, c[expected]) != 0) { fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]); @@ -144,7 +144,7 @@ int main (int argc, char **argv) if (argc == 3) line_to_do = atoi(argv[2]); - in = g_io_channel_new_file (argv[1], G_IO_FILE_MODE_READ, &error); + in = g_io_channel_new_file (argv[1], "r", &error); if (!in) { fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message); -- 2.7.4