/* GStreamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
- * Copyright (c) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
+ * Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
+ * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
}
}
+/* we only allow <i>, <u> and <b>, so let's take a simple approach. This code
+ * assumes the input has been escaped and subrip_unescape_formatting() has then
+ * been run over the input! This function adds missing closing markup tags and
+ * removes broken closing tags for tags that have never been opened. */
+static void
+subrip_fix_up_markup (gchar ** p_txt)
+{
+ gchar *cur, *next_tag;
+ gchar open_tags[32];
+ guint num_open_tags = 0;
+
+ g_assert (*p_txt != NULL);
+
+ cur = *p_txt;
+ while (*cur != '\0') {
+ next_tag = strchr (cur, '<');
+ if (next_tag == NULL)
+ break;
+ ++next_tag;
+ switch (*next_tag) {
+ case '/':{
+ ++next_tag;
+ if (num_open_tags == 0 || open_tags[num_open_tags - 1] != *next_tag) {
+ GST_LOG ("broken input, closing tag '%c' is not open", *next_tag);
+ g_memmove (next_tag - 2, next_tag + 2, strlen (next_tag + 2) + 1);
+ next_tag -= 2;
+ } else {
+ /* it's all good, closing tag which is open */
+ --num_open_tags;
+ }
+ break;
+ }
+ case 'i':
+ case 'b':
+ case 'u':
+ if (num_open_tags == G_N_ELEMENTS (open_tags))
+ return; /* something dodgy is going on, stop parsing */
+ open_tags[num_open_tags] = *next_tag;
+ ++num_open_tags;
+ break;
+ default:
+ GST_ERROR ("unexpected tag '%c' (%s)", *next_tag, next_tag);
+ g_assert_not_reached ();
+ break;
+ }
+ cur = next_tag;
+ }
+
+ if (num_open_tags > 0) {
+ GString *s;
+
+ s = g_string_new (*p_txt);
+ while (num_open_tags > 0) {
+ GST_LOG ("adding missing closing tag '%c'", open_tags[num_open_tags - 1]);
+ g_string_append_c (s, '<');
+ g_string_append_c (s, '/');
+ g_string_append_c (s, open_tags[num_open_tags - 1]);
+ g_string_append_c (s, '>');
+ --num_open_tags;
+ }
+ g_free (*p_txt);
+ *p_txt = g_string_free (s, FALSE);
+ }
+}
+
static gchar *
parse_subrip (ParserState * state, const gchar * line)
{
state->state = 0;
subrip_unescape_formatting (ret);
strip_trailing_newlines (ret);
+ subrip_fix_up_markup (&ret);
return ret;
}
return NULL;
if (subtitle) {
guint subtitle_len = strlen (subtitle);
+ /* +1 for terminating NUL character */
ret = gst_pad_alloc_buffer_and_set_caps (self->srcpad,
- GST_BUFFER_OFFSET_NONE, subtitle_len,
+ GST_BUFFER_OFFSET_NONE, subtitle_len + 1,
GST_PAD_CAPS (self->srcpad), &buf);
if (ret == GST_FLOW_OK) {
- memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len);
+ /* copy terminating NUL character as well */
+ memcpy (GST_BUFFER_DATA (buf), subtitle, subtitle_len + 1);
+ GST_BUFFER_SIZE (buf) = subtitle_len;
GST_BUFFER_TIMESTAMP (buf) = self->state.start_time;
GST_BUFFER_DURATION (buf) = self->state.duration;
return buf;
}
-static struct
+typedef struct
{
const gchar *in;
GstClockTime from_ts;
GstClockTime to_ts;
const gchar *out;
-} srt1_input[] = {
+} SubParseInputChunk;
+
+static SubParseInputChunk srt_input[] = {
{
- "1\n00:00:01,000 --> 00:00:02,000\nOne\n\n",
- 1 * GST_SECOND, 2 * GST_SECOND, "One"}, {
- "2\n00:00:02,000 --> 00:00:03,000\nTwo\n\n",
- 2 * GST_SECOND, 3 * GST_SECOND, "Two"}, {
- "3\n00:00:03,000 --> 00:00:04,000\nThree\n\n",
- 3 * GST_SECOND, 4 * GST_SECOND, "Three"}, {
- "4\n00:00:04,000 --> 00:00:05,000\nFour\n\n",
- 4 * GST_SECOND, 5 * GST_SECOND, "Four"}, {
- "5\n00:00:05,000 --> 00:00:06,000\nFive\n",
- 5 * GST_SECOND, 6 * GST_SECOND, "Five"}
+ "1\n00:00:01,000 --> 00:00:02,000\nOne\n\n",
+ 1 * GST_SECOND, 2 * GST_SECOND, "One"}, {
+ "2\n00:00:02,000 --> 00:00:03,000\nTwo\n\n",
+ 2 * GST_SECOND, 3 * GST_SECOND, "Two"}, {
+ "3\n00:00:03,000 --> 00:00:04,000\nThree\n\n",
+ 3 * GST_SECOND, 4 * GST_SECOND, "Three"}, {
+ "4\n00:00:04,000 --> 00:00:05,000\nFour\n\n",
+ 4 * GST_SECOND, 5 * GST_SECOND, "Four"}, {
+ "5\n00:00:05,000 --> 00:00:06,000\nFive\n\n",
+ 5 * GST_SECOND, 6 * GST_SECOND, "Five"}, {
+ /* markup should be preserved */
+ "6\n00:00:06,000 --> 00:00:07,000\n<i>Six</i>\n\n",
+ 6 * GST_SECOND, 7 * GST_SECOND, "<i>Six</i>"}, {
+ /* open markup tags should be closed */
+ "7\n00:00:07,000 --> 00:00:08,000\n<i>Seven\n\n",
+ 7 * GST_SECOND, 8 * GST_SECOND, "<i>Seven</i>"}, {
+ /* open markup tags should be closed (II) */
+ "8\n00:00:08,000 --> 00:00:09,000\n<b><i>Eight\n\n",
+ 8 * GST_SECOND, 9 * GST_SECOND, "<b><i>Eight</i></b>"}, {
+ /* broken markup should be fixed */
+ "9\n00:00:09,000 --> 00:00:10,000\n</b>\n\n",
+ 9 * GST_SECOND, 10 * GST_SECOND, ""}, {
+ "10\n00:00:10,000 --> 00:00:11,000\n</b></i>\n\n",
+ 10 * GST_SECOND, 11 * GST_SECOND, ""}, {
+ "11\n00:00:11,000 --> 00:00:12,000\n<i>xyz</b></i>\n\n",
+ 11 * GST_SECOND, 12 * GST_SECOND, "<i>xyz</i>"}, {
+ "12\n00:00:12,000 --> 00:00:13,000\n<i>xyz</b>\n\n",
+ 12 * GST_SECOND, 13 * GST_SECOND, "<i>xyz</i>"}, {
+ /* skip a few chunk numbers here, the numbers shouldn't matter */
+ "24\n00:01:00,000 --> 00:02:00,000\nYep, still here\n\n",
+ 60 * GST_SECOND, 120 * GST_SECOND, "Yep, still here"}, {
+ /* make sure stuff is escaped properly, but allowed markup stays intact */
+ "25\n00:03:00,000 --> 00:04:00,000\ngave <i>Rock & Roll</i> to\n\n",
+ 180 * GST_SECOND, 240 * GST_SECOND, "gave <i>Rock & Roll</i> to"}, {
+ "26\n00:04:00,000 --> 00:05:00,000\n<i>Rock & Roll</i>\n\n",
+ 240 * GST_SECOND, 300 * GST_SECOND, "<i>Rock & Roll</i>"}, {
+ "27\n00:06:00,000 --> 00:08:00,000\nRock & Roll\n\n",
+ 360 * GST_SECOND, 480 * GST_SECOND, "Rock & Roll"}
};
static void
}
static void
-test_srt_do_test (guint start_idx, guint num)
+test_srt_do_test (SubParseInputChunk * input, guint start_idx, guint num)
{
guint n;
for (n = start_idx; n < start_idx + num; ++n) {
GstBuffer *buf;
- buf = buffer_from_static_string (srt1_input[n].in);
+ buf = buffer_from_static_string (input[n].in);
fail_unless_equals_int (gst_pad_push (mysrcpad, buf), GST_FLOW_OK);
}
fail_unless (buf != NULL);
fail_unless (GST_BUFFER_TIMESTAMP_IS_VALID (buf), NULL);
fail_unless (GST_BUFFER_DURATION_IS_VALID (buf), NULL);
- fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf),
- srt1_input[n].from_ts);
+ fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf), input[n].from_ts);
fail_unless_equals_uint64 (GST_BUFFER_DURATION (buf),
- srt1_input[n].to_ts - srt1_input[n].from_ts);
+ input[n].to_ts - input[n].from_ts);
out = (gchar *) GST_BUFFER_DATA (buf);
out_size = GST_BUFFER_SIZE (buf);
/* shouldn't have trailing newline characters */
fail_if (out_size > 0 && out[out_size - 1] == '\n');
- fail_unless (strncmp (out, srt1_input[n].out, out_size) == 0);
+ /* shouldn't include NUL-terminator in data size */
+ fail_if (out_size > 0 && out[out_size - 1] == '\0');
+ /* but should still have a NUL-terminator behind the declared data */
+ fail_unless_equals_int (out[out_size], '\0');
+ /* make sure out string matches expected string */
+ fail_unless_equals_string (out, input[n].out);
/* check caps */
fail_unless (GST_BUFFER_CAPS (buf) != NULL);
buffer_caps_struct = gst_caps_get_structure (GST_BUFFER_CAPS (buf), 0);
- fail_unless (gst_structure_has_name (buffer_caps_struct, "text/plain")
- || gst_structure_has_name (buffer_caps_struct, "text/x-pango-markup"));
+ fail_unless_equals_string (gst_structure_get_name (buffer_caps_struct),
+ "text/x-pango-markup");
}
teardown_subparse ();
GST_START_TEST (test_srt)
{
- test_srt_do_test (0, G_N_ELEMENTS (srt1_input));
+ test_srt_do_test (srt_input, 0, G_N_ELEMENTS (srt_input));
/* make sure everything works fine if we don't start with chunk 1 */
- test_srt_do_test (1, G_N_ELEMENTS (srt1_input) - 1);
- test_srt_do_test (2, G_N_ELEMENTS (srt1_input) - 2);
- test_srt_do_test (3, G_N_ELEMENTS (srt1_input) - 3);
- test_srt_do_test (4, G_N_ELEMENTS (srt1_input) - 4);
+ test_srt_do_test (srt_input, 1, G_N_ELEMENTS (srt_input) - 1);
+ test_srt_do_test (srt_input, 2, G_N_ELEMENTS (srt_input) - 2);
+ test_srt_do_test (srt_input, 3, G_N_ELEMENTS (srt_input) - 3);
+ test_srt_do_test (srt_input, 4, G_N_ELEMENTS (srt_input) - 4);
/* try with empty input, immediate EOS */
- test_srt_do_test (5, G_N_ELEMENTS (srt1_input) - 5);
+ test_srt_do_test (srt_input, 5, G_N_ELEMENTS (srt_input) - 5);
}
GST_END_TEST;