1 /* GStreamer SSA subtitle parser
2 * Copyright (c) 2006 Tim-Philipp Müller <tim centricular net>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
20 /* Super-primitive SSA parser - we just want the text and ignore
21 * everything else like styles and timing codes etc. for now */
27 #include <stdlib.h> /* atoi() */
30 #include "gstssaparse.h"
32 GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
33 #define GST_CAT_DEFAULT ssa_parse_debug
35 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
38 GST_STATIC_CAPS ("application/x-ssa; application/x-ass")
41 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
44 GST_STATIC_CAPS ("text/x-raw, format=pango-markup")
47 #define gst_ssa_parse_parent_class parent_class
48 G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT);
50 static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
51 element, GstStateChange transition);
52 static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps);
53 static gboolean gst_ssa_parse_src_event (GstPad * pad, GstObject * parent,
55 static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent,
57 static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent,
61 gst_ssa_parse_dispose (GObject * object)
63 GstSsaParse *parse = GST_SSA_PARSE (object);
68 GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
72 gst_ssa_parse_init (GstSsaParse * parse)
74 parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
75 gst_pad_set_chain_function (parse->sinkpad,
76 GST_DEBUG_FUNCPTR (gst_ssa_parse_chain));
77 gst_pad_set_event_function (parse->sinkpad,
78 GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event));
79 gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad);
81 parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
82 gst_pad_set_event_function (parse->srcpad,
83 GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event));
84 gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad);
85 gst_pad_use_fixed_caps (parse->srcpad);
86 gst_pad_set_caps (parse->srcpad,
87 gst_static_pad_template_get_caps (&src_templ));
90 parse->framed = FALSE;
91 parse->send_tags = FALSE;
95 gst_ssa_parse_class_init (GstSsaParseClass * klass)
97 GObjectClass *object_class = G_OBJECT_CLASS (klass);
98 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
100 object_class->dispose = gst_ssa_parse_dispose;
102 gst_element_class_add_pad_template (element_class,
103 gst_static_pad_template_get (&sink_templ));
104 gst_element_class_add_pad_template (element_class,
105 gst_static_pad_template_get (&src_templ));
106 gst_element_class_set_static_metadata (element_class,
107 "SSA Subtitle Parser", "Codec/Parser/Subtitle",
108 "Parses SSA subtitle streams",
109 "Tim-Philipp Müller <tim centricular net>");
111 GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0,
112 "SSA subtitle parser");
114 element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state);
118 gst_ssa_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
120 return gst_pad_event_default (pad, parent, event);
124 gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
128 switch (GST_EVENT_TYPE (event)) {
133 gst_event_parse_caps (event, &caps);
134 res = gst_ssa_parse_setcaps (pad, caps);
135 gst_event_unref (event);
139 res = gst_pad_event_default (pad, parent, event);
146 gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps)
148 GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad));
152 const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF };
159 s = gst_caps_get_structure (caps, 0);
160 val = gst_structure_get_value (s, "codec_data");
162 parse->framed = FALSE;
163 GST_ERROR ("Only SSA subtitles embedded in containers are supported");
167 parse->framed = TRUE;
168 parse->send_tags = TRUE;
170 priv = (GstBuffer *) g_value_get_boxed (val);
171 g_return_val_if_fail (priv != NULL, FALSE);
173 gst_buffer_ref (priv);
175 gst_buffer_map (priv, &map, GST_MAP_READ);
177 ptr = (gchar *) map.data;
181 if (left >= 3 && memcmp (ptr, bom_utf8, 3) == 0) {
186 if (!strstr (ptr, "[Script Info]"))
189 if (!g_utf8_validate (ptr, left, NULL))
192 /* FIXME: parse initial section */
193 parse->ini = g_strndup (ptr, left);
194 GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini);
196 gst_buffer_unmap (priv, &map);
197 gst_buffer_unref (priv);
199 outcaps = gst_caps_new_simple ("text/x-raw",
200 "format", G_TYPE_STRING, "pango-markup", NULL);
202 ret = gst_pad_set_caps (parse->srcpad, outcaps);
203 gst_caps_unref (outcaps);
210 GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header");
211 gst_buffer_unmap (priv, &map);
212 gst_buffer_unref (priv);
217 GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8");
218 gst_buffer_unmap (priv, &map);
219 gst_buffer_unref (priv);
225 gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
228 gboolean removed_any = FALSE;
230 while ((t = strchr (txt, '{'))) {
231 end = strchr (txt, '}');
233 GST_WARNING_OBJECT (parse, "Missing { for style override code");
236 /* move terminating NUL character forward as well */
237 g_memmove (t, end + 1, strlen (end + 1) + 1);
241 /* these may occur outside of curly brackets. We don't handle the different
242 * wrapping modes yet, so just remove these markers from the text for now */
243 while ((t = strstr (txt, "\\n"))) {
247 while ((t = strstr (txt, "\\N"))) {
251 while ((t = strstr (txt, "\\h"))) {
260 * gst_ssa_parse_push_line:
261 * @parse: caller element
263 * @start: timestamp for the buffer
264 * @duration: duration for the buffer
266 * Parse the text in a buffer with the given properties and
267 * push it to the srcpad of the @parse element
269 * Returns: result of the push of the created buffer
272 gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
273 GstClockTime start, GstClockTime duration)
281 GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
282 num, GST_TIME_ARGS (start));
284 /* skip all non-text fields before the actual text */
286 for (i = 0; i < 8; ++i) {
289 return GST_FLOW_ERROR;
293 GST_LOG_OBJECT (parse, "Text : %s", t);
295 if (gst_ssa_parse_remove_override_codes (parse, t)) {
296 GST_LOG_OBJECT (parse, "Clean: %s", t);
299 /* we claim to output pango markup, so we must escape the
300 * text even if we don't actually use any pango markup yet */
301 escaped = g_markup_printf_escaped ("%s", t);
303 len = strlen (escaped);
305 /* allocate enough for a terminating NUL, but don't include it in buf size */
306 buf = gst_buffer_new_and_alloc (len + 1);
307 gst_buffer_fill (buf, 0, escaped, len + 1);
308 gst_buffer_set_size (buf, len);
311 GST_BUFFER_TIMESTAMP (buf) = start;
312 GST_BUFFER_DURATION (buf) = duration;
314 GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT
315 " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start),
316 GST_TIME_ARGS (duration));
318 ret = gst_pad_push (parse->srcpad, buf);
320 if (ret != GST_FLOW_OK) {
321 GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt,
322 gst_flow_get_name (ret));
329 gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
332 GstSsaParse *parse = GST_SSA_PARSE (parent);
337 if (G_UNLIKELY (!parse->framed))
340 if (G_UNLIKELY (parse->send_tags)) {
343 tags = gst_tag_list_new_empty ();
344 gst_tag_list_add (tags, GST_TAG_MERGE_APPEND, GST_TAG_SUBTITLE_CODEC,
345 "SubStation Alpha", NULL);
346 gst_pad_push_event (parse->srcpad, gst_event_new_tag (tags));
347 parse->send_tags = FALSE;
350 /* make double-sure it's 0-terminated and all */
351 gst_buffer_map (buf, &map, GST_MAP_READ);
352 txt = g_strndup ((gchar *) map.data, map.size);
353 gst_buffer_unmap (buf, &map);
358 ts = GST_BUFFER_TIMESTAMP (buf);
359 ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf));
361 if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) {
364 /* just advance time without sending anything */
365 gst_segment_init (&segment, GST_FORMAT_TIME);
368 gst_pad_push_event (parse->srcpad, gst_event_new_segment (&segment));
372 gst_buffer_unref (buf);
380 GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL),
381 ("Only SSA subtitles embedded in containers are supported"));
382 gst_buffer_unref (buf);
383 return GST_FLOW_NOT_NEGOTIATED;
387 GST_ELEMENT_WARNING (parse, STREAM, FORMAT, (NULL),
388 ("Received empty subtitle"));
389 gst_buffer_unref (buf);
394 static GstStateChangeReturn
395 gst_ssa_parse_change_state (GstElement * element, GstStateChange transition)
397 GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
398 GstSsaParse *parse = GST_SSA_PARSE (element);
400 switch (transition) {
401 case GST_STATE_CHANGE_READY_TO_PAUSED:
407 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
408 if (ret == GST_STATE_CHANGE_FAILURE)
411 switch (transition) {
412 case GST_STATE_CHANGE_PAUSED_TO_READY:
415 parse->framed = FALSE;