From 60bf63486bc7fdf44b698a8cf078d26315727696 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sebastian=20Dr=C3=B6ge?= Date: Fri, 10 Oct 2008 17:13:40 +0000 Subject: [PATCH] Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before pass... Original commit message from CVS: * gst/subparse/gstsubparse.c: (gst_sub_parse_data_format_autodetect), (handle_buffer), (gst_sub_parse_change_state): * gst/subparse/gstsubparse.h: * tests/check/elements/subparse.c: (GST_START_TEST): Add support for subtitle files with UTF-8 BOM at the beginning by simple stripping it from the first line before passing it to any parsing code. Fixes bug #555257 and playback of files created by Gnome Subtitles. --- ChangeLog | 12 ++++++++++++ gst/subparse/gstsubparse.c | 21 +++++++++++++++++++-- gst/subparse/gstsubparse.h | 2 ++ tests/check/elements/subparse.c | 10 ++++++++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index fc26d8f..4bd31b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2008-10-10 Sebastian Dröge + + * gst/subparse/gstsubparse.c: + (gst_sub_parse_data_format_autodetect), (handle_buffer), + (gst_sub_parse_change_state): + * gst/subparse/gstsubparse.h: + * tests/check/elements/subparse.c: (GST_START_TEST): + Add support for subtitle files with UTF-8 BOM at the beginning + by simple stripping it from the first line before passing it + to any parsing code. Fixes bug #555257 and playback of files + created by Gnome Subtitles. + 2008-10-10 Wim Taymans * gst/audiotestsrc/gstaudiotestsrc.c: diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index dc2a345..5fbcfdd 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -922,6 +922,11 @@ gst_sub_parse_data_format_autodetect (gchar * match_str) } } + /* If the string contains a UTF-8 BOM drop it */ + if ((guint8) match_str[0] == 0xEF && (guint8) match_str[1] == 0xBB + && (guint8) match_str[2] == 0xBF) + match_str += 3; + if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) { GST_LOG ("MicroDVD (frame based) format detected"); return GST_SUB_PARSE_FORMAT_MDVDSUB; @@ -1073,11 +1078,22 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) } while ((line = get_next_line (self)) && !self->flushing) { + guint offset = 0; + + /* If this is the first line and it contains a UTF-8 BOM drop it */ + if (self->first_line && strlen (line) >= 3 && + (guint8) line[0] == 0xEF && (guint8) line[1] == 0xBB + && (guint8) line[2] == 0xBF) { + offset = 3; + } + + self->first_line = FALSE; + /* Set segment on our parser state machine */ self->state.segment = &self->segment; /* Now parse the line, out of segment lines will just return NULL */ - GST_LOG_OBJECT (self, "Parsing line '%s'", line); - subtitle = self->parse_line (&self->state, line); + GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset); + subtitle = self->parse_line (&self->state, line + offset); g_free (line); if (subtitle) { @@ -1252,6 +1268,7 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition) self->next_offset = 0; self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN; self->valid_utf8 = TRUE; + self->first_line = TRUE; g_string_truncate (self->textbuf, 0); break; default: diff --git a/gst/subparse/gstsubparse.h b/gst/subparse/gstsubparse.h index 86bf301..510c3e8 100644 --- a/gst/subparse/gstsubparse.h +++ b/gst/subparse/gstsubparse.h @@ -93,6 +93,8 @@ struct _GstSubParse { gboolean flushing; gboolean valid_utf8; gchar *encoding; + + gboolean first_line; }; struct _GstSubParseClass { diff --git a/tests/check/elements/subparse.c b/tests/check/elements/subparse.c index 05e3619..56beab4 100644 --- a/tests/check/elements/subparse.c +++ b/tests/check/elements/subparse.c @@ -132,6 +132,13 @@ static SubParseInputChunk srt_input1[] = { 44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."} }; +/* has UTF-8 BOM at the start */ +static SubParseInputChunk srt_input2[] = { + { + "\xef\xbb\xbf" "1\n00:00:00,000 --> 00:00:03,50\nJust testing.\n\n", + 0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."} +}; + static void setup_subparse (void) { @@ -237,6 +244,9 @@ GST_START_TEST (test_srt) /* try with spaces instead of doubled zeroes (which is not exactly according to spec) */ test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1)); + + /* try with UTF-8 BOM at the start */ + test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2)); } GST_END_TEST; -- 2.7.4