+2008-10-10 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+
+ * gst/subparse/gstsubparse.c:
+ (gst_sub_parse_data_format_autodetect), (handle_buffer),
+ (gst_sub_parse_change_state):
+ * gst/subparse/gstsubparse.h:
+ * tests/check/elements/subparse.c: (GST_START_TEST):
+ Add support for subtitle files with UTF-8 BOM at the beginning
+ by simple stripping it from the first line before passing it
+ to any parsing code. Fixes bug #555257 and playback of files
+ created by Gnome Subtitles.
+
2008-10-10 Wim Taymans <wim.taymans@collabora.co.uk>
* gst/audiotestsrc/gstaudiotestsrc.c:
}
}
+ /* If the string contains a UTF-8 BOM drop it */
+ if ((guint8) match_str[0] == 0xEF && (guint8) match_str[1] == 0xBB
+ && (guint8) match_str[2] == 0xBF)
+ match_str += 3;
+
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
}
while ((line = get_next_line (self)) && !self->flushing) {
+ guint offset = 0;
+
+ /* If this is the first line and it contains a UTF-8 BOM drop it */
+ if (self->first_line && strlen (line) >= 3 &&
+ (guint8) line[0] == 0xEF && (guint8) line[1] == 0xBB
+ && (guint8) line[2] == 0xBF) {
+ offset = 3;
+ }
+
+ self->first_line = FALSE;
+
/* Set segment on our parser state machine */
self->state.segment = &self->segment;
/* Now parse the line, out of segment lines will just return NULL */
- GST_LOG_OBJECT (self, "Parsing line '%s'", line);
- subtitle = self->parse_line (&self->state, line);
+ GST_LOG_OBJECT (self, "Parsing line '%s'", line + offset);
+ subtitle = self->parse_line (&self->state, line + offset);
g_free (line);
if (subtitle) {
self->next_offset = 0;
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
self->valid_utf8 = TRUE;
+ self->first_line = TRUE;
g_string_truncate (self->textbuf, 0);
break;
default:
44 * GST_SECOND + 44 * GST_MSECOND, "I still cant see anything."}
};
+/* has UTF-8 BOM at the start */
+static SubParseInputChunk srt_input2[] = {
+ {
+ "\xef\xbb\xbf" "1\n00:00:00,000 --> 00:00:03,50\nJust testing.\n\n",
+ 0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
+};
+
static void
setup_subparse (void)
{
/* try with spaces instead of doubled zeroes (which is not exactly according to spec) */
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input1));
+
+ /* try with UTF-8 BOM at the start */
+ test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
}
GST_END_TEST;