2005-11-17 Matthias Clasen <mclasen@redhat.com>
+ * tests/Makefile.am:
+ * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
+ and g_utf8_offset_to_pointer.
+
+ * glib/gutf8.c (g_utf8_pointer_to_offset)
+ (g_utf8_offset_to_pointer): Handle negative offsets, and use
+ "stutter stepping" for going backwards. (#320638, Larry
+ Ewing)
+
* glib/gbacktrace.c:
* glib/gdate.c:
* glib/gthread.c: const correctness fixes, found
2005-11-17 Matthias Clasen <mclasen@redhat.com>
+ * tests/Makefile.am:
+ * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
+ and g_utf8_offset_to_pointer.
+
+ * glib/gutf8.c (g_utf8_pointer_to_offset)
+ (g_utf8_offset_to_pointer): Handle negative offsets, and use
+ "stutter stepping" for going backwards. (#320638, Larry
+ Ewing)
+
* glib/gbacktrace.c:
* glib/gdate.c:
* glib/gthread.c: const correctness fixes, found
2005-11-17 Matthias Clasen <mclasen@redhat.com>
+ * tests/Makefile.am:
+ * tests/utf8-pointer.c: Unit tests for g_utf8_pointer_to_offset
+ and g_utf8_offset_to_pointer.
+
+ * glib/gutf8.c (g_utf8_pointer_to_offset)
+ (g_utf8_offset_to_pointer): Handle negative offsets, and use
+ "stutter stepping" for going backwards. (#320638, Larry
+ Ewing)
+
* glib/gbacktrace.c:
* glib/gdate.c:
* glib/gthread.c: const correctness fixes, found
* Converts from an integer character offset to a pointer to a position
* within the string.
*
+ * Since 2.10, this function allows to pass a negative @offset to
+ * step backwards.
+ *
* Return value: the resulting pointer
**/
gchar *
glong offset)
{
const gchar *s = str;
- while (offset--)
- s = g_utf8_next_char (s);
-
+
+ if (offset > 0)
+ while (offset--)
+ s = g_utf8_next_char (s);
+ else
+ {
+ const char *s1;
+
+ /* This nice technique for fast backwards stepping
+ * through a UTF-8 string was dubbed "stutter stepping"
+ * by its inventor, Larry Ewing.
+ */
+ while (offset)
+ {
+ s1 = s;
+ s += offset;
+ while ((*s & 0xc0) == 0x80)
+ s--;
+
+ offset += g_utf8_pointer_to_offset (s, s1);
+ }
+ }
+
return (gchar *)s;
}
*
* Converts from a pointer to position within a string to a integer
* character offset.
+ *
+ * Since 2.10, this function allows @pos to be before @str, and returns
+ * a negative offset in this case.
*
* Return value: the resulting character offset
**/
{
const gchar *s = str;
glong offset = 0;
-
- while (s < pos)
- {
- s = g_utf8_next_char (s);
- offset++;
- }
+ if (pos < str)
+ offset = - g_utf8_pointer_to_offset (pos, str);
+ else
+ while (s < pos)
+ {
+ s = g_utf8_next_char (s);
+ offset++;
+ }
+
return offset;
}
unicode-caseconv \
unicode-encoding \
utf8-validate \
+ utf8-pointer \
uri-test
test_scripts = run-markup-tests.sh run-collate-tests.sh
unicode_caseconv_LDADD = $(progs_ldadd)
unicode_collate_LDADD = $(progs_ldadd)
utf8_validate_LDADD = $(progs_ldadd)
+utf8_pointer_LDADD = $(progs_ldadd)
uri_test_LDADD = $(progs_ldadd)
lib_LTLIBRARIES = libmoduletestplugin_a.la libmoduletestplugin_b.la
--- /dev/null
+/* GLIB - Library of useful routines for C programming
+ * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Modified by the GLib Team and others 1997-2000. See the AUTHORS
+ * file for a list of people on the GLib Team. See the ChangeLog
+ * files for a list of changes. These files are distributed with
+ * GLib at ftp://ftp.gtk.org/pub/gtk/.
+ */
+
+#include <glib.h>
+
+/* Test conversions between offsets and pointers */
+
+static void test_utf8 (gchar *string)
+{
+ gint num_chars;
+ gchar **p;
+ gint i, j;
+
+ g_assert (g_utf8_validate (string, -1, NULL));
+
+ num_chars = g_utf8_strlen (string, -1);
+
+ p = (gchar **) g_malloc (num_chars * sizeof (gchar *));
+
+ p[0] = string;
+ for (i = 1; i < num_chars; i++)
+ p[i] = g_utf8_next_char (p[i-1]);
+
+ for (i = 0; i < num_chars; i++)
+ for (j = 0; j < num_chars; j++)
+ {
+ g_assert (g_utf8_offset_to_pointer (p[i], j - i) == p[j]);
+ g_assert (g_utf8_pointer_to_offset (p[i], p[j]) == j - i);
+ }
+
+ g_free (p);
+}
+
+gchar *longline = "asdasdas dsaf asfd as fdasdf asfd asdf as dfas dfasdf a"
+"asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdççççççççças ffsd asfd as fdASASASAs As"
+"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd"
+"asd fasdf asdf asdf asd fasfd as fdaèèèèèèè òòòòòòòòòòòòsfd asdf as fdas ffsd asfd as fdASASASAs D"
+"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfgùùùùùùùùùùùùùù sdfg sdf gsdfg sdfg sd"
+"asd fasdf asdf asdf asd fasfd as fdasfd asd@@@@@@@f as fdas ffsd asfd as fdASASASAs D "
+"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdf€€€€€€€€€€€€€€€€€€g sdfg sdfg sdf gsdfg sdfg sd"
+"asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdas ffsd asfd as fdASASASAs D"
+"Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd\n\nlalala\n";
+
+int main (int argc, char *argv[])
+{
+ test_utf8 (longline);
+
+ return 0;
+}