From: Matthias Clasen Date: Thu, 17 Nov 2005 15:50:31 +0000 (+0000) Subject: Unit tests for g_utf8_pointer_to_offset and g_utf8_offset_to_pointer. X-Git-Tag: GLIB_2_9_0~4 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1ee0917984152f9fe09b33a3660ba96cec0b55b1;p=platform%2Fupstream%2Fglib.git Unit tests for g_utf8_pointer_to_offset and g_utf8_offset_to_pointer. 2005-11-17 Matthias Clasen * tests/Makefile.am: * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset and g_utf8_offset_to_pointer. * glib/gutf8.c (g_utf8_pointer_to_offset) (g_utf8_offset_to_pointer): Handle negative offsets, and use "stutter stepping" for going backwards. (#320638, Larry Ewing) --- diff --git a/ChangeLog b/ChangeLog index c6e93d6..ba16472 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2005-11-17 Matthias Clasen + * tests/Makefile.am: + * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset + and g_utf8_offset_to_pointer. + + * glib/gutf8.c (g_utf8_pointer_to_offset) + (g_utf8_offset_to_pointer): Handle negative offsets, and use + "stutter stepping" for going backwards. (#320638, Larry + Ewing) + * glib/gbacktrace.c: * glib/gdate.c: * glib/gthread.c: const correctness fixes, found diff --git a/ChangeLog.pre-2-10 b/ChangeLog.pre-2-10 index c6e93d6..ba16472 100644 --- a/ChangeLog.pre-2-10 +++ b/ChangeLog.pre-2-10 @@ -1,5 +1,14 @@ 2005-11-17 Matthias Clasen + * tests/Makefile.am: + * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset + and g_utf8_offset_to_pointer. + + * glib/gutf8.c (g_utf8_pointer_to_offset) + (g_utf8_offset_to_pointer): Handle negative offsets, and use + "stutter stepping" for going backwards. (#320638, Larry + Ewing) + * glib/gbacktrace.c: * glib/gdate.c: * glib/gthread.c: const correctness fixes, found diff --git a/ChangeLog.pre-2-12 b/ChangeLog.pre-2-12 index c6e93d6..ba16472 100644 --- a/ChangeLog.pre-2-12 +++ b/ChangeLog.pre-2-12 @@ -1,5 +1,14 @@ 2005-11-17 Matthias Clasen + * tests/Makefile.am: + * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset + and g_utf8_offset_to_pointer. + + * glib/gutf8.c (g_utf8_pointer_to_offset) + (g_utf8_offset_to_pointer): Handle negative offsets, and use + "stutter stepping" for going backwards. (#320638, Larry + Ewing) + * glib/gbacktrace.c: * glib/gdate.c: * glib/gthread.c: const correctness fixes, found diff --git a/glib/gutf8.c b/glib/gutf8.c index 612264e..4f7d486 100644 --- a/glib/gutf8.c +++ b/glib/gutf8.c @@ -284,6 +284,9 @@ g_utf8_get_char (const gchar *p) * Converts from an integer character offset to a pointer to a position * within the string. * + * Since 2.10, this function allows to pass a negative @offset to + * step backwards. + * * Return value: the resulting pointer **/ gchar * @@ -291,9 +294,29 @@ g_utf8_offset_to_pointer (const gchar *str, glong offset) { const gchar *s = str; - while (offset--) - s = g_utf8_next_char (s); - + + if (offset > 0) + while (offset--) + s = g_utf8_next_char (s); + else + { + const char *s1; + + /* This nice technique for fast backwards stepping + * through a UTF-8 string was dubbed "stutter stepping" + * by its inventor, Larry Ewing. + */ + while (offset) + { + s1 = s; + s += offset; + while ((*s & 0xc0) == 0x80) + s--; + + offset += g_utf8_pointer_to_offset (s, s1); + } + } + return (gchar *)s; } @@ -304,6 +327,9 @@ g_utf8_offset_to_pointer (const gchar *str, * * Converts from a pointer to position within a string to a integer * character offset. + * + * Since 2.10, this function allows @pos to be before @str, and returns + * a negative offset in this case. * * Return value: the resulting character offset **/ @@ -313,13 +339,16 @@ g_utf8_pointer_to_offset (const gchar *str, { const gchar *s = str; glong offset = 0; - - while (s < pos) - { - s = g_utf8_next_char (s); - offset++; - } + if (pos < str) + offset = - g_utf8_pointer_to_offset (pos, str); + else + while (s < pos) + { + s = g_utf8_next_char (s); + offset++; + } + return offset; } diff --git a/tests/Makefile.am b/tests/Makefile.am index a5336e4..3b61a2c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -103,6 +103,7 @@ test_programs = \ unicode-caseconv \ unicode-encoding \ utf8-validate \ + utf8-pointer \ uri-test test_scripts = run-markup-tests.sh run-collate-tests.sh @@ -161,6 +162,7 @@ unicode_encoding_LDADD = $(progs_ldadd) unicode_caseconv_LDADD = $(progs_ldadd) unicode_collate_LDADD = $(progs_ldadd) utf8_validate_LDADD = $(progs_ldadd) +utf8_pointer_LDADD = $(progs_ldadd) uri_test_LDADD = $(progs_ldadd) lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la diff --git a/tests/utf8-pointer.c b/tests/utf8-pointer.c new file mode 100644 index 0000000..3072851 --- /dev/null +++ b/tests/utf8-pointer.c @@ -0,0 +1,72 @@ +/* GLIB - Library of useful routines for C programming + * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Modified by the GLib Team and others 1997-2000. See the AUTHORS + * file for a list of people on the GLib Team. See the ChangeLog + * files for a list of changes. These files are distributed with + * GLib at ftp://ftp.gtk.org/pub/gtk/. + */ + +#include + +/* Test conversions between offsets and pointers */ + +static void test_utf8 (gchar *string) +{ + gint num_chars; + gchar **p; + gint i, j; + + g_assert (g_utf8_validate (string, -1, NULL)); + + num_chars = g_utf8_strlen (string, -1); + + p = (gchar **) g_malloc (num_chars * sizeof (gchar *)); + + p[0] = string; + for (i = 1; i < num_chars; i++) + p[i] = g_utf8_next_char (p[i-1]); + + for (i = 0; i < num_chars; i++) + for (j = 0; j < num_chars; j++) + { + g_assert (g_utf8_offset_to_pointer (p[i], j - i) == p[j]); + g_assert (g_utf8_pointer_to_offset (p[i], p[j]) == j - i); + } + + g_free (p); +} + +gchar *longline = "asdasdas dsaf asfd as fdasdf asfd asdf as dfas dfasdf a" +"asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdççççççççças ffsd asfd as fdASASASAs As" +"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd" +"asd fasdf asdf asdf asd fasfd as fdaèèèèèèè òòòòòòòòòòòòsfd asdf as fdas ffsd asfd as fdASASASAs D" +"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfgùùùùùùùùùùùùùù sdfg sdf gsdfg sdfg sd" +"asd fasdf asdf asdf asd fasfd as fdasfd asd@@@@@@@f as fdas ffsd asfd as fdASASASAs D " +"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdf€€€€€€€€€€€€€€€€€€g sdfg sdfg sdf gsdfg sdfg sd" +"asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdas ffsd asfd as fdASASASAs D" +"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd\n\nlalala\n"; + +int main (int argc, char *argv[]) +{ + test_utf8 (longline); + + return 0; +}