Unicode: add a g_utf8_substring convenience api

author Matthias Clasen <mclasen@redhat.com>

Fri, 24 Jun 2011 01:31:40 +0000 (21:31 -0400)

committer Matthias Clasen <mclasen@redhat.com>

Fri, 24 Jun 2011 01:31:40 +0000 (21:31 -0400)
author Matthias Clasen <mclasen@redhat.com>
Fri, 24 Jun 2011 01:31:40 +0000 (21:31 -0400)
committer Matthias Clasen <mclasen@redhat.com>
Fri, 24 Jun 2011 01:31:40 +0000 (21:31 -0400)
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt

index 7eb4309..0f750c6 100644 (file)
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -2726,6 +2726,7 @@ g_utf8_strncpy
  g_utf8_strchr
  g_utf8_strrchr
  g_utf8_strreverse
+g_utf8_substring
  g_utf8_validate
  
  <SUBSECTION>
diff --git a/glib/glib.symbols b/glib/glib.symbols

index 9d34c55..abe216a 100644 (file)
--- a/glib/glib.symbols
+++ b/glib/glib.symbols
@@ -1196,13 +1196,8 @@ g_tree_search
  g_tree_steal
  g_tree_traverse
  g_unichar_break_type
-g_utf8_collate
-g_utf8_collate_key
-g_utf8_collate_key_for_filename
-g_unicode_canonical_decomposition
  g_unicode_canonical_ordering
  g_unichar_combining_class
-g_utf8_normalize
  g_unichar_isalnum
  g_unichar_isalpha
  g_unichar_iscntrl
@@ -1228,30 +1223,36 @@ g_unichar_get_script
  g_unichar_digit_value
  g_unichar_xdigit_value
  g_unichar_type
+g_unicode_canonical_decomposition
  g_utf8_casefold
-g_utf8_strup
-g_utf8_strdown
-g_get_charset
-g_ucs4_to_utf16
-g_ucs4_to_utf8
-g_utf16_to_ucs4
-g_utf16_to_utf8
+g_utf8_collate
+g_utf8_collate_key
+g_utf8_collate_key_for_filename
  g_utf8_find_next_char
  g_utf8_find_prev_char
  g_utf8_get_char
  g_utf8_get_char_validated
+g_utf8_normalize
  g_utf8_offset_to_pointer
  g_utf8_pointer_to_offset
  g_utf8_prev_char
+g_utf8_strup
+g_utf8_strdown
  g_utf8_strchr
  g_utf8_strlen
  g_utf8_strncpy
  g_utf8_strrchr
  g_utf8_strreverse
+g_utf8_substring
  g_utf8_to_ucs4
  g_utf8_to_ucs4_fast
  g_utf8_to_utf16
  g_utf8_validate
+g_get_charset
+g_ucs4_to_utf16
+g_ucs4_to_utf8
+g_utf16_to_ucs4
+g_utf16_to_utf8
  g_unichar_to_utf8
  g_unichar_validate
  glib_pgettext
diff --git a/glib/gunicode.h b/glib/gunicode.h

index d7a68c2..261b4a9 100644 (file)
--- a/glib/gunicode.h
+++ b/glib/gunicode.h
@@ -312,8 +312,12 @@ gchar*   g_utf8_find_next_char    (const gchar *p,
  gchar*   g_utf8_find_prev_char    (const gchar *str,
                                    const gchar *p) G_GNUC_PURE;
  
-glong g_utf8_strlen (const gchar *p,  
-                    gssize       max) G_GNUC_PURE;
+glong    g_utf8_strlen            (const gchar *p,
+                                   gssize       max) G_GNUC_PURE;
+
+gchar   *g_utf8_substring         (const gchar *p,
+                                   glong        start_pos,
+                                   glong        end_pos) G_GNUC_MALLOC;
  
  /* Copies n characters from src to dest */
  gchar* g_utf8_strncpy (gchar       *dest,
diff --git a/glib/gutf8.c b/glib/gutf8.c

index 7977f3a..51cef3c 100644 (file)
--- a/glib/gutf8.c
+++ b/glib/gutf8.c
@@ -269,6 +269,38 @@ g_utf8_strlen (const gchar *p,
  }
  
  /**
+ * g_utf8_substring:
+ * @str: a UTF-8 encoded string
+ * @start_pos: a character offset within @str
+ * @end_pos: another character offset within @str
+ *
+ * Copies a substring out of a UTF-8 encoded string.
+ * The substring will contain @end_pos - @start_pos
+ * characters.
+ *
+ * Returns: a newly allocated copy of the requested
+ *     substring. Free with g_free() when no longer needed.
+ *
+ * Since: 2.30
+ */
+gchar *
+g_utf8_substring (const gchar *str,
+                  glong        start_pos,
+                  glong        end_pos)
+{
+  gchar *start, *end, *out;
+
+  start = g_utf8_offset_to_pointer (str, start_pos);
+  end = g_utf8_offset_to_pointer (start, end_pos - start_pos);
+
+  out = g_malloc (end - start + 1);
+  memcpy (out, start, end - start);
+  out[end - start] = 0;
+
+  return out;
+}
+
+/**
   * g_utf8_get_char:
   * @p: a pointer to Unicode character encoded as UTF-8
   * 
diff --git a/glib/tests/utf8-misc.c b/glib/tests/utf8-misc.c

index dc441f2..5d23f8b 100644 (file)
--- a/glib/tests/utf8-misc.c
+++ b/glib/tests/utf8-misc.c
@@ -106,8 +106,28 @@ test_utf8_reverse (void)
    r = g_utf8_strreverse ("\340\254\213\360\220\244\200\101\341\272\266", -1);
    g_assert_cmpstr (r, ==, "\341\272\266\101\360\220\244\200\340\254\213");
    g_free (r);
+}
+
+static void
+test_utf8_substring (void)
+{
+  gchar *r;
  
+  r = g_utf8_substring ("abcd", 1, 3);
+  g_assert_cmpstr (r, ==, "bc");
+  g_free (r);
  
+  r = g_utf8_substring ("abcd", 0, 4);
+  g_assert_cmpstr (r, ==, "abcd");
+  g_free (r);
+
+  r = g_utf8_substring ("abcd", 2, 2);
+  g_assert_cmpstr (r, ==, "");
+  g_free (r);
+
+  r = g_utf8_substring ("abc\xe2\x82\xa0gh\xe2\x82\xa4", 2, 5);
+  g_assert_cmpstr (r, ==, "c\xe2\x82\xa0g");
+  g_free (r);
  }
  
  static void
@@ -519,6 +539,7 @@ main (int   argc,
    g_test_add_func ("/utf8/strncpy", test_utf8_strncpy);
    g_test_add_func ("/utf8/strrchr", test_utf8_strrchr);
    g_test_add_func ("/utf8/reverse", test_utf8_reverse);
+  g_test_add_func ("/utf8/substring", test_utf8_substring);
    g_test_add_func ("/unicode/validate", test_unichar_validate);
    g_test_add_func ("/unicode/character-type", test_unichar_character_type);
    g_test_add_func ("/unicode/break-type", test_unichar_break_type);
author	Matthias Clasen <mclasen@redhat.com>
	Fri, 24 Jun 2011 01:31:40 +0000 (21:31 -0400)
committer	Matthias Clasen <mclasen@redhat.com>
	Fri, 24 Jun 2011 01:31:40 +0000 (21:31 -0400)
docs/reference/glib/glib-sections.txt		patch \| blob \| history
glib/glib.symbols		patch \| blob \| history
glib/gunicode.h		patch \| blob \| history
glib/gutf8.c		patch \| blob \| history
glib/tests/utf8-misc.c		patch \| blob \| history