From: Sebastian Dröge Date: Wed, 15 Oct 2008 11:25:09 +0000 (+0000) Subject: gst/subparse/gstsubparse.c: Improve typefinding a bit. If we don't have a Unicode... X-Git-Tag: 1.19.3~511^2~10242 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e86d1dd432d9c5b9068f3dc3bf71f01f27c61cdd;p=platform%2Fupstream%2Fgstreamer.git gst/subparse/gstsubparse.c: Improve typefinding a bit. If we don't have a Unicode charset try GST_SUBTITLE_ENCODING a... Original commit message from CVS: * gst/subparse/gstsubparse.c: (gst_subparse_type_find): Improve typefinding a bit. If we don't have a Unicode charset try GST_SUBTITLE_ENCODING and otherwise try ISO-8859-15. --- diff --git a/ChangeLog b/ChangeLog index c2817d9..24c23d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2008-10-15 Sebastian Dröge + + * gst/subparse/gstsubparse.c: (gst_subparse_type_find): + Improve typefinding a bit. If we don't have a Unicode charset + try GST_SUBTITLE_ENCODING and otherwise try ISO-8859-15. + 2008-10-14 Edward Hervey * ext/theora/theoradec.c: (theora_dec_decode_buffer): diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index 8200f1b..bba1e08 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -1408,6 +1408,7 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) GstCaps *caps; gchar *str; gchar *encoding = NULL; + const gchar *end; if (!(data = gst_type_find_peek (tf, 0, 129))) return; @@ -1427,12 +1428,39 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) err->message); g_error_free (err); g_free (encoding); + } else { g_free (str); - return; + str = converted_str; + g_free (encoding); } - g_free (str); + } - str = converted_str; + /* Check if at least the first 120 chars are valid UTF8, + * otherwise convert as always */ + if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) { + gchar *converted_str; + GError *err = NULL; + gsize tmp; + const gchar *enc; + + enc = g_getenv ("GST_SUBTITLE_ENCODING"); + if (enc == NULL || *enc == '\0') { + /* if local encoding is UTF-8 and no encoding specified + * via the environment variable, assume ISO-8859-15 */ + if (g_get_charset (&enc)) { + enc = "ISO-8859-15"; + } + } + converted_str = gst_convert_to_utf8 (str, 128, enc, &tmp, &err); + if (converted_str == NULL) { + GST_DEBUG ("Charset conversion failed: %s", err->message); + g_error_free (err); + g_free (str); + return; + } else { + g_free (str); + str = converted_str; + } } format = gst_sub_parse_data_format_autodetect (str);