From: Tim-Philipp Müller Date: Thu, 12 Apr 2007 12:19:20 +0000 (+0000) Subject: API: add gst_tag_freeform_string_to_utf8() (#405072). X-Git-Tag: 1.19.3~511^2~11214 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a208469078d254d6c49fe4cc39c8476c5aa2c329;p=platform%2Fupstream%2Fgstreamer.git API: add gst_tag_freeform_string_to_utf8() (#405072). Original commit message from CVS: * docs/libs/gst-plugins-base-libs-sections.txt: * gst-libs/gst/tag/tag.h: * gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8): API: add gst_tag_freeform_string_to_utf8() (#405072). * gst-libs/gst/tag/gstid3tag.c: (gst_tag_extract_id3v1_string): Use gst_tag_freeform_string_to_utf8() here. --- diff --git a/ChangeLog b/ChangeLog index 0dee2a5..e203eb6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2007-04-12 Tim-Philipp Müller + + * docs/libs/gst-plugins-base-libs-sections.txt: + * gst-libs/gst/tag/tag.h: + * gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8): + API: add gst_tag_freeform_string_to_utf8() (#405072). + + * gst-libs/gst/tag/gstid3tag.c: (gst_tag_extract_id3v1_string): + Use gst_tag_freeform_string_to_utf8() here. + 2007-04-12 Wim Taymans * gst/gdp/gstgdppay.c: (gst_gdp_pay_chain), diff --git a/docs/libs/gst-plugins-base-libs-sections.txt b/docs/libs/gst-plugins-base-libs-sections.txt index ec065b0..747761e 100644 --- a/docs/libs/gst-plugins-base-libs-sections.txt +++ b/docs/libs/gst-plugins-base-libs-sections.txt @@ -960,6 +960,7 @@ GST_TAG_CMML_CLIP GST_TAG_CMML_HEAD GST_TAG_CMML_STREAM gst_tag_register_musicbrainz_tags +gst_tag_freeform_string_to_utf8 gst_tag_parse_extended_comment GstTagImageType diff --git a/gst-libs/gst/tag/gstid3tag.c b/gst-libs/gst/tag/gstid3tag.c index 807b7e9..fa0eb6e 100644 --- a/gst-libs/gst/tag/gstid3tag.c +++ b/gst-libs/gst/tag/gstid3tag.c @@ -320,66 +320,14 @@ static void gst_tag_extract_id3v1_string (GstTagList * list, const gchar * tag, const gchar * start, const guint size) { - const gchar *env; - gsize bytes_read; + const gchar *env_vars[] = { "GST_ID3V1_TAG_ENCODING", + "GST_ID3_TAG_ENCODING", "GST_TAG_ENCODING", NULL + }; gchar *utf8; - /* Should we try the charsets specified - * via environment variables FIRST ? */ - if (g_utf8_validate (start, size, NULL)) { - utf8 = g_strndup (start, size); - goto beach; - } - - env = g_getenv ("GST_ID3V1_TAG_ENCODING"); - if (!env || *env == '\0') - env = g_getenv ("GST_ID3_TAG_ENCODING"); - if (!env || *env == '\0') - env = g_getenv ("GST_TAG_ENCODING"); - - /* Try charsets specified via the environment */ - if (env && *env != '\0') { - gchar **c, **csets; - - csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); - - for (c = csets; c && *c; ++c) { - if ((utf8 = - g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { - if (bytes_read == size) { - g_strfreev (csets); - goto beach; - } - g_free (utf8); - utf8 = NULL; - } - } - } - /* Try current locale (if not UTF-8) */ - if (!g_get_charset (&env)) { - if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) { - if (bytes_read == size) { - goto beach; - } - g_free (utf8); - utf8 = NULL; - } - } - - /* Try ISO-8859-1 */ - utf8 = - g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL); - if (utf8 != NULL && bytes_read == size) { - goto beach; - } - - g_free (utf8); - return; - -beach: + utf8 = gst_tag_freeform_string_to_utf8 (start, size, env_vars); - g_strchomp (utf8); - if (utf8 && utf8[0] != '\0') { + if (utf8 && *utf8 != '\0') { gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, tag, utf8, NULL); } diff --git a/gst-libs/gst/tag/tag.h b/gst-libs/gst/tag/tag.h index e9f260c..5aa3842 100644 --- a/gst-libs/gst/tag/tag.h +++ b/gst-libs/gst/tag/tag.h @@ -210,8 +210,12 @@ gboolean gst_tag_parse_extended_comment (const gchar * ext_comme gchar ** value, gboolean fail_if_no_key); +gchar * gst_tag_freeform_string_to_utf8 (const gchar * data, + gint size, + const gchar ** env_vars); + /* FIXME 0.11: replace with a more general gst_tag_library_init() */ -void gst_tag_register_musicbrainz_tags (void); +void gst_tag_register_musicbrainz_tags (void); G_END_DECLS diff --git a/gst-libs/gst/tag/tags.c b/gst-libs/gst/tag/tags.c index 1e4cc47..e1966cf 100644 --- a/gst-libs/gst/tag/tags.c +++ b/gst-libs/gst/tag/tags.c @@ -222,3 +222,95 @@ gst_tag_parse_extended_comment (const gchar * ext_comment, gchar ** key, return TRUE; } + +/** + * gst_tag_freeform_string_to_utf8: + * @data: string data + * @size: length of string data, or -1 if the string is NUL-terminated + * @env_vars: a NULL-terminated string array of environment variable names, + * or NULL + * + * Convenience function to read a string with unknown character encoding. If + * the string is already in UTF-8 encoding, it will be returned right away. + * Otherwise, the environment will be searched for a number of environment + * variables (whose names are specified in the NULL-terminated string array + * @env_vars) containing a list of character encodings to try/use. If none + * are specified, the current locale will be tried. If that also doesn't work, + * ISO-8859-1 is assumed (which will almost always succeed). + * + * Returns: a newly-allocated string in UTF-8 encoding, or NULL + * + * Since: 0.10.13 + */ +gchar * +gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, + const gchar ** env_vars) +{ + const gchar *env = NULL; + gsize bytes_read; + gchar *utf8 = NULL; + + g_return_val_if_fail (data != NULL, NULL); + + if (size < 0) + size = strlen (data); + + /* Should we try the charsets specified + * via environment variables FIRST ? */ + if (g_utf8_validate (data, size, NULL)) + return g_strndup (data, size); + + while ((env == NULL || *env == '\0') && env_vars && *env_vars != NULL) { + env = g_getenv (*env_vars); + ++env_vars; + } + + /* Try charsets specified via the environment */ + if (env != NULL && *env != '\0') { + gchar **c, **csets; + + csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); + + for (c = csets; c && *c; ++c) { + if ((utf8 = g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { + if (bytes_read == size) { + g_strfreev (csets); + goto beach; + } + g_free (utf8); + utf8 = NULL; + } + } + + g_strfreev (csets); + } + + /* Try current locale (if not UTF-8) */ + if (!g_get_charset (&env)) { + if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) { + if (bytes_read == size) { + goto beach; + } + g_free (utf8); + utf8 = NULL; + } + } + + /* Try ISO-8859-1 */ + utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL); + if (utf8 != NULL && bytes_read == size) { + goto beach; + } + + g_free (utf8); + return NULL; + +beach: + + g_strchomp (utf8); + if (utf8 && utf8[0] != '\0') + return utf8; + + g_free (utf8); + return NULL; +}