1 /* GStreamer TTML subtitle parser
2 * Copyright (C) <2015> British Broadcasting Corporation
4 * Chris Bass <dash@rd.bbc.co.uk>
5 * Peter Taylour <dash@rd.bbc.co.uk>
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
24 * Parses subtitle files encoded using the EBU-TT-D profile of TTML, as defined
25 * in https://tech.ebu.ch/files/live/sites/tech/files/shared/tech/tech3380.pdf
26 * and http://www.w3.org/TR/ttaf1-dfxp/, respectively.
35 #include <libxml/xmlmemory.h>
36 #include <libxml/parser.h>
38 #include "ttmlparse.h"
40 #include "subtitlemeta.h"
42 #define DEFAULT_CELLRES_X 32
43 #define DEFAULT_CELLRES_Y 15
44 #define MAX_FONT_FAMILY_NAME_LENGTH 128
45 #define NSECONDS_IN_DAY 24 * 3600 * GST_SECOND
47 #define TTML_CHAR_NULL 0x00
48 #define TTML_CHAR_SPACE 0x20
49 #define TTML_CHAR_TAB 0x09
50 #define TTML_CHAR_LF 0x0A
51 #define TTML_CHAR_CR 0x0D
53 GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
54 #define GST_CAT_DEFAULT ttmlparse_debug
56 static gchar *ttml_get_xml_property (const xmlNode * node, const char *name);
57 static gpointer ttml_copy_tree_element (gconstpointer src, gpointer data);
59 typedef struct _TtmlStyleSet TtmlStyleSet;
60 typedef struct _TtmlElement TtmlElement;
61 typedef struct _TtmlScene TtmlScene;
65 TTML_ELEMENT_TYPE_STYLE,
66 TTML_ELEMENT_TYPE_REGION,
67 TTML_ELEMENT_TYPE_BODY,
68 TTML_ELEMENT_TYPE_DIV,
70 TTML_ELEMENT_TYPE_SPAN,
71 TTML_ELEMENT_TYPE_ANON_SPAN,
77 TTML_WHITESPACE_MODE_NONE,
78 TTML_WHITESPACE_MODE_DEFAULT,
79 TTML_WHITESPACE_MODE_PRESERVE,
86 TtmlWhitespaceMode whitespace_mode;
91 TtmlStyleSet *style_set;
95 /* Represents a static scene consisting of one or more trees of elements that
96 * should be visible over a specific period of time. */
111 static TtmlStyleSet *
112 ttml_style_set_new (void)
114 TtmlStyleSet *ret = g_slice_new0 (TtmlStyleSet);
115 ret->table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
121 ttml_style_set_delete (TtmlStyleSet * style_set)
124 g_hash_table_unref (style_set->table);
125 g_slice_free (TtmlStyleSet, style_set);
130 /* If attribute with name @attr_name already exists in @style_set, its value
131 * will be replaced by @attr_value. */
133 ttml_style_set_add_attr (TtmlStyleSet * style_set, const gchar * attr_name,
134 const gchar * attr_value)
136 return g_hash_table_insert (style_set->table, g_strdup (attr_name),
137 g_strdup (attr_value));
142 ttml_style_set_contains_attr (TtmlStyleSet * style_set, const gchar * attr_name)
144 return g_hash_table_contains (style_set->table, attr_name);
149 ttml_style_set_get_attr (TtmlStyleSet * style_set, const gchar * attr_name)
151 return g_hash_table_lookup (style_set->table, attr_name);
156 ttml_hex_pair_to_byte (const gchar * hex_pair)
158 gint hi_digit, lo_digit;
160 hi_digit = g_ascii_xdigit_value (*hex_pair);
161 lo_digit = g_ascii_xdigit_value (*(hex_pair + 1));
162 return (hi_digit << 4) + lo_digit;
166 /* Color strings in EBU-TT-D can have the form "#RRBBGG" or "#RRBBGGAA". */
167 static GstSubtitleColor
168 ttml_parse_colorstring (const gchar * color)
171 const gchar *c = NULL;
172 GstSubtitleColor ret = { 0, 0, 0, 0 };
177 length = strlen (color);
178 if (((length == 7) || (length == 9)) && *color == '#') {
181 ret.r = ttml_hex_pair_to_byte (c);
182 ret.g = ttml_hex_pair_to_byte (c + 2);
183 ret.b = ttml_hex_pair_to_byte (c + 4);
188 ret.a = ttml_hex_pair_to_byte (c + 6);
190 GST_CAT_LOG (ttmlparse_debug, "Returning color - r:%u b:%u g:%u a:%u",
191 ret.r, ret.b, ret.g, ret.a);
193 GST_CAT_ERROR (ttmlparse_debug, "Invalid color string: %s", color);
201 ttml_style_set_print (TtmlStyleSet * style_set)
204 gpointer attr_name, attr_value;
207 GST_CAT_LOG (ttmlparse_debug, "\t\t[NULL]");
211 g_hash_table_iter_init (&iter, style_set->table);
212 while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
213 GST_CAT_LOG (ttmlparse_debug, "\t\t%s: %s", (const gchar *) attr_name,
214 (const gchar *) attr_value);
219 static TtmlStyleSet *
220 ttml_parse_style_set (const xmlNode * node)
226 value = ttml_get_xml_property (node, "id");
228 GST_CAT_ERROR (ttmlparse_debug, "styles must have an ID.");
233 s = ttml_style_set_new ();
235 for (attr = node->properties; attr != NULL; attr = attr->next) {
236 if (attr->ns && ((g_strcmp0 ((const gchar *) attr->ns->prefix, "tts") == 0)
237 || (g_strcmp0 ((const gchar *) attr->ns->prefix, "itts") == 0)
238 || (g_strcmp0 ((const gchar *) attr->ns->prefix, "ebutts") == 0))) {
239 ttml_style_set_add_attr (s, (const gchar *) attr->name,
240 (const gchar *) attr->children->content);
249 ttml_delete_element (TtmlElement * element)
251 g_free ((gpointer) element->id);
253 g_strfreev (element->styles);
254 g_free ((gpointer) element->region);
255 ttml_style_set_delete (element->style_set);
256 g_free ((gpointer) element->text);
257 g_slice_free (TtmlElement, element);
262 ttml_get_xml_property (const xmlNode * node, const char *name)
264 xmlChar *xml_string = NULL;
265 gchar *gst_string = NULL;
267 g_return_val_if_fail (strlen (name) < 128, NULL);
269 xml_string = xmlGetProp (node, (xmlChar *) name);
272 gst_string = g_strdup ((gchar *) xml_string);
273 xmlFree (xml_string);
278 /* EBU-TT-D timecodes have format hours:minutes:seconds[.fraction] */
280 ttml_parse_timecode (const gchar * timestring)
283 guint64 hours = 0, minutes = 0, seconds = 0, milliseconds = 0;
284 GstClockTime time = GST_CLOCK_TIME_NONE;
286 GST_CAT_LOG (ttmlparse_debug, "time string: %s", timestring);
288 strings = g_strsplit (timestring, ":", 3);
289 if (g_strv_length (strings) != 3U) {
290 GST_CAT_ERROR (ttmlparse_debug, "badly formatted time string: %s",
295 hours = g_ascii_strtoull (strings[0], NULL, 10U);
296 minutes = g_ascii_strtoull (strings[1], NULL, 10U);
297 if (g_strstr_len (strings[2], -1, ".")) {
299 gchar **substrings = g_strsplit (strings[2], ".", 2);
300 seconds = g_ascii_strtoull (substrings[0], NULL, 10U);
301 n_digits = strlen (substrings[1]);
302 milliseconds = g_ascii_strtoull (substrings[1], NULL, 10U);
304 (guint64) (milliseconds * pow (10.0, (3 - (double) n_digits)));
305 g_strfreev (substrings);
307 seconds = g_ascii_strtoull (strings[2], NULL, 10U);
310 if (minutes > 59 || seconds > 60) {
311 GST_CAT_ERROR (ttmlparse_debug, "invalid time string "
312 "(minutes or seconds out-of-bounds): %s\n", timestring);
315 g_strfreev (strings);
316 GST_CAT_LOG (ttmlparse_debug,
317 "hours: %" G_GUINT64_FORMAT " minutes: %" G_GUINT64_FORMAT
318 " seconds: %" G_GUINT64_FORMAT " milliseconds: %" G_GUINT64_FORMAT "",
319 hours, minutes, seconds, milliseconds);
321 time = hours * GST_SECOND * 3600
322 + minutes * GST_SECOND * 60
323 + seconds * GST_SECOND + milliseconds * GST_MSECOND;
330 ttml_parse_element (const xmlNode * node)
332 TtmlElement *element;
333 TtmlElementType type;
336 GST_CAT_DEBUG (ttmlparse_debug, "Element name: %s",
337 (const char *) node->name);
338 if ((g_strcmp0 ((const char *) node->name, "style") == 0)) {
339 type = TTML_ELEMENT_TYPE_STYLE;
340 } else if ((g_strcmp0 ((const char *) node->name, "region") == 0)) {
341 type = TTML_ELEMENT_TYPE_REGION;
342 } else if ((g_strcmp0 ((const char *) node->name, "body") == 0)) {
343 type = TTML_ELEMENT_TYPE_BODY;
344 } else if ((g_strcmp0 ((const char *) node->name, "div") == 0)) {
345 type = TTML_ELEMENT_TYPE_DIV;
346 } else if ((g_strcmp0 ((const char *) node->name, "p") == 0)) {
347 type = TTML_ELEMENT_TYPE_P;
348 } else if ((g_strcmp0 ((const char *) node->name, "span") == 0)) {
349 type = TTML_ELEMENT_TYPE_SPAN;
350 } else if ((g_strcmp0 ((const char *) node->name, "text") == 0)) {
351 type = TTML_ELEMENT_TYPE_ANON_SPAN;
352 } else if ((g_strcmp0 ((const char *) node->name, "br") == 0)) {
353 type = TTML_ELEMENT_TYPE_BR;
358 element = g_slice_new0 (TtmlElement);
359 element->type = type;
361 if ((value = ttml_get_xml_property (node, "id"))) {
362 element->id = g_strdup (value);
366 if ((value = ttml_get_xml_property (node, "style"))) {
367 element->styles = g_strsplit (value, " ", 0);
368 GST_CAT_DEBUG (ttmlparse_debug, "%u style(s) referenced in element.",
369 g_strv_length (element->styles));
373 if (element->type == TTML_ELEMENT_TYPE_STYLE
374 || element->type == TTML_ELEMENT_TYPE_REGION) {
376 ss = ttml_parse_style_set (node);
378 element->style_set = ss;
380 GST_CAT_WARNING (ttmlparse_debug,
381 "Style or Region contains no styling attributes.");
384 if ((value = ttml_get_xml_property (node, "region"))) {
385 element->region = g_strdup (value);
389 if ((value = ttml_get_xml_property (node, "begin"))) {
390 element->begin = ttml_parse_timecode (value);
393 element->begin = GST_CLOCK_TIME_NONE;
396 if ((value = ttml_get_xml_property (node, "end"))) {
397 element->end = ttml_parse_timecode (value);
400 element->end = GST_CLOCK_TIME_NONE;
404 GST_CAT_LOG (ttmlparse_debug, "Node content: %s", node->content);
405 element->text = g_strdup ((const gchar *) node->content);
408 if (element->type == TTML_ELEMENT_TYPE_BR)
409 element->text = g_strdup ("\n");
411 if ((value = ttml_get_xml_property (node, "space"))) {
412 if (g_strcmp0 (value, "preserve") == 0)
413 element->whitespace_mode = TTML_WHITESPACE_MODE_PRESERVE;
414 else if (g_strcmp0 (value, "default") == 0)
415 element->whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
424 ttml_parse_body (const xmlNode * node)
427 TtmlElement *element;
429 GST_CAT_LOG (ttmlparse_debug, "parsing node %s", node->name);
430 element = ttml_parse_element (node);
432 ret = g_node_new (element);
436 for (node = node->children; node != NULL; node = node->next) {
437 GNode *descendants = NULL;
438 if ((descendants = ttml_parse_body (node)))
439 g_node_append (ret, descendants);
446 /* Update the fields of a GstSubtitleStyleSet, @style_set, according to the
447 * values defined in a TtmlStyleSet, @tss, and a given cell resolution. */
449 ttml_update_style_set (GstSubtitleStyleSet * style_set, TtmlStyleSet * tss,
450 guint cellres_x, guint cellres_y)
454 if ((attr = ttml_style_set_get_attr (tss, "textDirection"))) {
455 if (g_strcmp0 (attr, "rtl") == 0)
456 style_set->text_direction = GST_SUBTITLE_TEXT_DIRECTION_RTL;
458 style_set->text_direction = GST_SUBTITLE_TEXT_DIRECTION_LTR;
461 if ((attr = ttml_style_set_get_attr (tss, "fontFamily"))) {
462 if (strlen (attr) <= MAX_FONT_FAMILY_NAME_LENGTH) {
463 g_free (style_set->font_family);
464 style_set->font_family = g_strdup (attr);
466 GST_CAT_WARNING (ttmlparse_debug,
467 "Ignoring font family name as it's overly long.");
471 if ((attr = ttml_style_set_get_attr (tss, "fontSize"))) {
472 style_set->font_size = g_ascii_strtod (attr, NULL) / 100.0;
474 style_set->font_size *= (1.0 / cellres_y);
476 if ((attr = ttml_style_set_get_attr (tss, "lineHeight"))) {
477 if (g_strcmp0 (attr, "normal") == 0)
478 style_set->line_height = -1;
480 style_set->line_height = g_ascii_strtod (attr, NULL) / 100.0;
483 if ((attr = ttml_style_set_get_attr (tss, "textAlign"))) {
484 if (g_strcmp0 (attr, "left") == 0)
485 style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_LEFT;
486 else if (g_strcmp0 (attr, "center") == 0)
487 style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_CENTER;
488 else if (g_strcmp0 (attr, "right") == 0)
489 style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_RIGHT;
490 else if (g_strcmp0 (attr, "end") == 0)
491 style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_END;
493 style_set->text_align = GST_SUBTITLE_TEXT_ALIGN_START;
496 if ((attr = ttml_style_set_get_attr (tss, "color"))) {
497 style_set->color = ttml_parse_colorstring (attr);
500 if ((attr = ttml_style_set_get_attr (tss, "backgroundColor"))) {
501 style_set->background_color = ttml_parse_colorstring (attr);
504 if ((attr = ttml_style_set_get_attr (tss, "fontStyle"))) {
505 if (g_strcmp0 (attr, "italic") == 0)
506 style_set->font_style = GST_SUBTITLE_FONT_STYLE_ITALIC;
508 style_set->font_style = GST_SUBTITLE_FONT_STYLE_NORMAL;
511 if ((attr = ttml_style_set_get_attr (tss, "fontWeight"))) {
512 if (g_strcmp0 (attr, "bold") == 0)
513 style_set->font_weight = GST_SUBTITLE_FONT_WEIGHT_BOLD;
515 style_set->font_weight = GST_SUBTITLE_FONT_WEIGHT_NORMAL;
518 if ((attr = ttml_style_set_get_attr (tss, "textDecoration"))) {
519 if (g_strcmp0 (attr, "underline") == 0)
520 style_set->text_decoration = GST_SUBTITLE_TEXT_DECORATION_UNDERLINE;
522 style_set->text_decoration = GST_SUBTITLE_TEXT_DECORATION_NONE;
525 if ((attr = ttml_style_set_get_attr (tss, "unicodeBidi"))) {
526 if (g_strcmp0 (attr, "embed") == 0)
527 style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_EMBED;
528 else if (g_strcmp0 (attr, "bidiOverride") == 0)
529 style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_OVERRIDE;
531 style_set->unicode_bidi = GST_SUBTITLE_UNICODE_BIDI_NORMAL;
534 if ((attr = ttml_style_set_get_attr (tss, "wrapOption"))) {
535 if (g_strcmp0 (attr, "noWrap") == 0)
536 style_set->wrap_option = GST_SUBTITLE_WRAPPING_OFF;
538 style_set->wrap_option = GST_SUBTITLE_WRAPPING_ON;
541 if ((attr = ttml_style_set_get_attr (tss, "multiRowAlign"))) {
542 if (g_strcmp0 (attr, "start") == 0)
543 style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_START;
544 else if (g_strcmp0 (attr, "center") == 0)
545 style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_CENTER;
546 else if (g_strcmp0 (attr, "end") == 0)
547 style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_END;
549 style_set->multi_row_align = GST_SUBTITLE_MULTI_ROW_ALIGN_AUTO;
552 if ((attr = ttml_style_set_get_attr (tss, "linePadding"))) {
553 style_set->line_padding = g_ascii_strtod (attr, NULL);
554 style_set->line_padding *= (1.0 / cellres_x);
557 if ((attr = ttml_style_set_get_attr (tss, "origin"))) {
559 style_set->origin_x = g_ascii_strtod (attr, &c) / 100.0;
560 while (!g_ascii_isdigit (*c) && *c != '+' && *c != '-')
562 style_set->origin_y = g_ascii_strtod (c, NULL) / 100.0;
565 if ((attr = ttml_style_set_get_attr (tss, "extent"))) {
567 style_set->extent_w = g_ascii_strtod (attr, &c) / 100.0;
568 if ((style_set->origin_x + style_set->extent_w) > 1.0) {
569 style_set->extent_w = 1.0 - style_set->origin_x;
571 while (!g_ascii_isdigit (*c) && *c != '+' && *c != '-')
573 style_set->extent_h = g_ascii_strtod (c, NULL) / 100.0;
574 if ((style_set->origin_y + style_set->extent_h) > 1.0) {
575 style_set->extent_h = 1.0 - style_set->origin_y;
579 if ((attr = ttml_style_set_get_attr (tss, "displayAlign"))) {
580 if (g_strcmp0 (attr, "center") == 0)
581 style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_CENTER;
582 else if (g_strcmp0 (attr, "after") == 0)
583 style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_AFTER;
585 style_set->display_align = GST_SUBTITLE_DISPLAY_ALIGN_BEFORE;
588 if ((attr = ttml_style_set_get_attr (tss, "padding"))) {
593 decimals = g_strsplit (attr, "%", 0);
594 n_decimals = g_strv_length (decimals) - 1;
595 for (i = 0; i < n_decimals; ++i)
596 g_strstrip (decimals[i]);
598 switch (n_decimals) {
600 style_set->padding_start = style_set->padding_end =
601 style_set->padding_before = style_set->padding_after =
602 g_ascii_strtod (decimals[0], NULL) / 100.0;
606 style_set->padding_before = style_set->padding_after =
607 g_ascii_strtod (decimals[0], NULL) / 100.0;
608 style_set->padding_start = style_set->padding_end =
609 g_ascii_strtod (decimals[1], NULL) / 100.0;
613 style_set->padding_before = g_ascii_strtod (decimals[0], NULL) / 100.0;
614 style_set->padding_start = style_set->padding_end =
615 g_ascii_strtod (decimals[1], NULL) / 100.0;
616 style_set->padding_after = g_ascii_strtod (decimals[2], NULL) / 100.0;
620 style_set->padding_before = g_ascii_strtod (decimals[0], NULL) / 100.0;
621 style_set->padding_end = g_ascii_strtod (decimals[1], NULL) / 100.0;
622 style_set->padding_after = g_ascii_strtod (decimals[2], NULL) / 100.0;
623 style_set->padding_start = g_ascii_strtod (decimals[3], NULL) / 100.0;
626 g_strfreev (decimals);
628 /* Padding values in TTML files are relative to the region width & height;
629 * make them relative to the overall display width & height like all other
631 style_set->padding_before *= style_set->extent_h;
632 style_set->padding_after *= style_set->extent_h;
633 style_set->padding_end *= style_set->extent_w;
634 style_set->padding_start *= style_set->extent_w;
637 if ((attr = ttml_style_set_get_attr (tss, "writingMode"))) {
638 if (g_str_has_prefix (attr, "rl"))
639 style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_RLTB;
640 else if ((g_strcmp0 (attr, "tbrl") == 0)
641 || (g_strcmp0 (attr, "tb") == 0))
642 style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_TBRL;
643 else if (g_strcmp0 (attr, "tblr") == 0)
644 style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_TBLR;
646 style_set->writing_mode = GST_SUBTITLE_WRITING_MODE_LRTB;
649 if ((attr = ttml_style_set_get_attr (tss, "showBackground"))) {
650 if (g_strcmp0 (attr, "whenActive") == 0)
651 style_set->show_background = GST_SUBTITLE_BACKGROUND_MODE_WHEN_ACTIVE;
653 style_set->show_background = GST_SUBTITLE_BACKGROUND_MODE_ALWAYS;
656 if ((attr = ttml_style_set_get_attr (tss, "overflow"))) {
657 if (g_strcmp0 (attr, "visible") == 0)
658 style_set->overflow = GST_SUBTITLE_OVERFLOW_MODE_VISIBLE;
660 style_set->overflow = GST_SUBTITLE_OVERFLOW_MODE_HIDDEN;
663 if ((attr = ttml_style_set_get_attr (tss, "fillLineGap"))) {
664 if (g_strcmp0 (attr, "true") == 0)
665 style_set->fill_line_gap = TRUE;
670 static TtmlStyleSet *
671 ttml_style_set_copy (TtmlStyleSet * style_set)
674 gpointer attr_name, attr_value;
675 TtmlStyleSet *ret = ttml_style_set_new ();
677 g_hash_table_iter_init (&iter, style_set->table);
678 while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
679 ttml_style_set_add_attr (ret, (const gchar *) attr_name,
680 (const gchar *) attr_value);
687 /* set2 overrides set1. Unlike style inheritance, merging will result in all
688 * values from set1 being merged into set2. */
689 static TtmlStyleSet *
690 ttml_style_set_merge (TtmlStyleSet * set1, TtmlStyleSet * set2)
692 TtmlStyleSet *ret = NULL;
695 ret = ttml_style_set_copy (set1);
699 gpointer attr_name, attr_value;
701 g_hash_table_iter_init (&iter, set2->table);
702 while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
703 ttml_style_set_add_attr (ret, (const gchar *) attr_name,
704 (const gchar *) attr_value);
708 ret = ttml_style_set_copy (set2);
716 ttml_get_relative_font_size (const gchar * parent_size,
717 const gchar * child_size)
719 guint psize = (guint) g_ascii_strtoull (parent_size, NULL, 10U);
720 guint csize = (guint) g_ascii_strtoull (child_size, NULL, 10U);
721 csize = (csize * psize) / 100U;
722 return g_strdup_printf ("%u%%", csize);
726 static TtmlStyleSet *
727 ttml_style_set_inherit (TtmlStyleSet * parent, TtmlStyleSet * child)
729 TtmlStyleSet *ret = NULL;
731 gpointer attr_name, attr_value;
734 ret = ttml_style_set_copy (child);
736 ret = ttml_style_set_new ();
742 g_hash_table_iter_init (&iter, parent->table);
743 while (g_hash_table_iter_next (&iter, &attr_name, &attr_value)) {
744 /* In TTML, if an element which has a defined fontSize is the child of an
745 * element that also has a defined fontSize, the child's font size is
746 * relative to that of its parent. If its parent doesn't have a defined
747 * fontSize, then the child's fontSize is relative to the document's cell
748 * size. Therefore, if the former is true, we calculate the value of
749 * fontSize based on the parent's fontSize; otherwise, we simply keep
750 * the value defined in the child's style set. */
751 if (g_strcmp0 ((const gchar *) attr_name, "fontSize") == 0
752 && ttml_style_set_contains_attr (ret, "fontSize")) {
753 const gchar *original_child_font_size =
754 ttml_style_set_get_attr (ret, "fontSize");
755 gchar *scaled_child_font_size =
756 ttml_get_relative_font_size ((const gchar *) attr_value,
757 original_child_font_size);
758 GST_CAT_LOG (ttmlparse_debug, "Calculated font size: %s",
759 scaled_child_font_size);
760 ttml_style_set_add_attr (ret, (const gchar *) attr_name,
761 scaled_child_font_size);
762 g_free (scaled_child_font_size);
765 /* Not all styling attributes are inherited in TTML. */
766 if (g_strcmp0 ((const gchar *) attr_name, "backgroundColor") != 0
767 && g_strcmp0 ((const gchar *) attr_name, "origin") != 0
768 && g_strcmp0 ((const gchar *) attr_name, "extent") != 0
769 && g_strcmp0 ((const gchar *) attr_name, "displayAlign") != 0
770 && g_strcmp0 ((const gchar *) attr_name, "overflow") != 0
771 && g_strcmp0 ((const gchar *) attr_name, "padding") != 0
772 && g_strcmp0 ((const gchar *) attr_name, "writingMode") != 0
773 && g_strcmp0 ((const gchar *) attr_name, "showBackground") != 0
774 && g_strcmp0 ((const gchar *) attr_name, "unicodeBidi") != 0) {
775 if (!ttml_style_set_contains_attr (ret, (const gchar *) attr_name)) {
776 ttml_style_set_add_attr (ret, (const gchar *) attr_name,
777 (const gchar *) attr_value);
787 * Returns TRUE iff @element1 and @element2 reference the same set of styles.
788 * If neither @element1 nor @element2 reference any styles, they are considered
789 * to have matching styling and, hence, TRUE is returned.
792 ttml_element_styles_match (TtmlElement * element1, TtmlElement * element2)
794 const gchar *const *strv;
797 if (!element1 || !element2 || (!element1->styles && element2->styles) ||
798 (element1->styles && !element2->styles))
801 if (!element1->styles && !element2->styles)
804 strv = (const gchar * const *) element2->styles;
806 if (g_strv_length (element1->styles) != g_strv_length (element2->styles))
809 for (i = 0; i < g_strv_length (element1->styles); ++i) {
810 if (!g_strv_contains (strv, element1->styles[i]))
819 ttml_get_element_type_string (TtmlElement * element)
821 switch (element->type) {
822 case TTML_ELEMENT_TYPE_STYLE:
823 return g_strdup ("<style>");
825 case TTML_ELEMENT_TYPE_REGION:
826 return g_strdup ("<region>");
828 case TTML_ELEMENT_TYPE_BODY:
829 return g_strdup ("<body>");
831 case TTML_ELEMENT_TYPE_DIV:
832 return g_strdup ("<div>");
834 case TTML_ELEMENT_TYPE_P:
835 return g_strdup ("<p>");
837 case TTML_ELEMENT_TYPE_SPAN:
838 return g_strdup ("<span>");
840 case TTML_ELEMENT_TYPE_ANON_SPAN:
841 return g_strdup ("<anon-span>");
843 case TTML_ELEMENT_TYPE_BR:
844 return g_strdup ("<br>");
847 return g_strdup ("Unknown");
853 /* Merge styles referenced by an element. */
855 ttml_resolve_styles (GNode * node, gpointer data)
857 TtmlStyleSet *tmp = NULL;
858 TtmlElement *element, *style;
859 GHashTable *styles_table;
863 styles_table = (GHashTable *) data;
864 element = node->data;
866 type_string = ttml_get_element_type_string (element);
867 GST_CAT_LOG (ttmlparse_debug, "Element type: %s", type_string);
868 g_free (type_string);
870 if (!element->styles)
873 for (i = 0; i < g_strv_length (element->styles); ++i) {
874 tmp = element->style_set;
875 style = g_hash_table_lookup (styles_table, element->styles[i]);
877 GST_CAT_LOG (ttmlparse_debug, "Merging style %s...", element->styles[i]);
878 element->style_set = ttml_style_set_merge (element->style_set,
880 ttml_style_set_delete (tmp);
882 GST_CAT_WARNING (ttmlparse_debug,
883 "Element references an unknown style (%s)", element->styles[i]);
887 GST_CAT_LOG (ttmlparse_debug, "Style set after merging:");
888 ttml_style_set_print (element->style_set);
895 ttml_resolve_referenced_styles (GList * trees, GHashTable * styles_table)
899 for (tree = g_list_first (trees); tree; tree = tree->next) {
900 GNode *root = (GNode *) tree->data;
901 g_node_traverse (root, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_resolve_styles,
907 /* Inherit styling attributes from parent. */
909 ttml_inherit_styles (GNode * node, gpointer data)
911 TtmlStyleSet *tmp = NULL;
912 TtmlElement *element, *parent;
915 element = node->data;
917 type_string = ttml_get_element_type_string (element);
918 GST_CAT_LOG (ttmlparse_debug, "Element type: %s", type_string);
919 g_free (type_string);
922 parent = node->parent->data;
923 if (parent->style_set) {
924 tmp = element->style_set;
925 if (element->type == TTML_ELEMENT_TYPE_ANON_SPAN ||
926 element->type == TTML_ELEMENT_TYPE_BR) {
927 element->style_set = ttml_style_set_merge (parent->style_set,
929 element->styles = g_strdupv (parent->styles);
931 element->style_set = ttml_style_set_inherit (parent->style_set,
934 ttml_style_set_delete (tmp);
938 GST_CAT_LOG (ttmlparse_debug, "Style set after inheriting:");
939 ttml_style_set_print (element->style_set);
946 ttml_inherit_element_styles (GList * trees)
950 for (tree = g_list_first (trees); tree; tree = tree->next) {
951 GNode *root = (GNode *) tree->data;
952 g_node_traverse (root, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_inherit_styles,
958 /* If whitespace_mode isn't explicitly set for this element, inherit from its
959 * parent. If this element is the root of the tree, set whitespace_mode to
960 * that of the overall document. */
962 ttml_inherit_element_whitespace_mode (GNode * node, gpointer data)
964 TtmlWhitespaceMode *doc_mode = (TtmlWhitespaceMode *) data;
965 TtmlElement *element = node->data;
968 if (element->whitespace_mode != TTML_WHITESPACE_MODE_NONE)
971 if (G_NODE_IS_ROOT (node)) {
972 element->whitespace_mode = *doc_mode;
976 parent = node->parent->data;
977 element->whitespace_mode = parent->whitespace_mode;
983 ttml_inherit_whitespace_mode (GNode * tree, TtmlWhitespaceMode doc_mode)
985 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
986 ttml_inherit_element_whitespace_mode, &doc_mode);
991 ttml_free_node_data (GNode * node, gpointer data)
993 TtmlElement *element = node->data;
994 ttml_delete_element (element);
1000 ttml_delete_tree (GNode * tree)
1002 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1, ttml_free_node_data,
1004 g_node_destroy (tree);
1015 ttml_clip_element_period (GNode * node, gpointer data)
1017 TtmlElement *element = node->data;
1018 ClipWindow *window = data;
1020 if (!GST_CLOCK_TIME_IS_VALID (element->begin)) {
1024 if (element->begin > window->end || element->end < window->begin) {
1025 ttml_delete_tree (node);
1030 element->begin = MAX (element->begin, window->begin);
1031 element->end = MIN (element->end, window->end);
1037 ttml_apply_time_window (GNode * tree, GstClockTime window_begin,
1038 GstClockTime window_end)
1041 window.begin = window_begin;
1042 window.end = window_end;
1044 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
1045 ttml_clip_element_period, &window);
1050 ttml_resolve_element_timings (GNode * node, gpointer data)
1052 TtmlElement *element, *leaf;
1054 leaf = element = node->data;
1056 if (GST_CLOCK_TIME_IS_VALID (leaf->begin)
1057 && GST_CLOCK_TIME_IS_VALID (leaf->end)) {
1058 GST_CAT_LOG (ttmlparse_debug, "Leaf node already has timing.");
1062 /* Inherit timings from ancestor. */
1063 while (node->parent && !GST_CLOCK_TIME_IS_VALID (element->begin)) {
1064 node = node->parent;
1065 element = node->data;
1068 if (!GST_CLOCK_TIME_IS_VALID (element->begin)) {
1069 GST_CAT_WARNING (ttmlparse_debug,
1070 "No timing found for element; setting to Root Temporal Extent.");
1072 leaf->end = NSECONDS_IN_DAY;
1074 leaf->begin = element->begin;
1075 leaf->end = element->end;
1076 GST_CAT_LOG (ttmlparse_debug, "Leaf begin: %" GST_TIME_FORMAT,
1077 GST_TIME_ARGS (leaf->begin));
1078 GST_CAT_LOG (ttmlparse_debug, "Leaf end: %" GST_TIME_FORMAT,
1079 GST_TIME_ARGS (leaf->end));
1087 ttml_resolve_timings (GNode * tree)
1089 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
1090 ttml_resolve_element_timings, NULL);
1095 ttml_resolve_leaf_region (GNode * node, gpointer data)
1097 TtmlElement *element, *leaf;
1098 leaf = element = node->data;
1100 while (node->parent && !element->region) {
1101 node = node->parent;
1102 element = node->data;
1105 if (element->region) {
1106 leaf->region = g_strdup (element->region);
1107 GST_CAT_LOG (ttmlparse_debug, "Leaf region: %s", leaf->region);
1109 GST_CAT_WARNING (ttmlparse_debug, "No region found above leaf element.");
1117 ttml_resolve_regions (GNode * tree)
1119 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
1120 ttml_resolve_leaf_region, NULL);
1126 GstClockTime start_time;
1127 GstClockTime next_transition_time;
1132 ttml_update_transition_time (GNode * node, gpointer data)
1134 TtmlElement *element = node->data;
1135 TrState *state = (TrState *) data;
1137 if ((element->begin < state->next_transition_time)
1138 && (!GST_CLOCK_TIME_IS_VALID (state->start_time)
1139 || (element->begin > state->start_time))) {
1140 state->next_transition_time = element->begin;
1141 GST_CAT_LOG (ttmlparse_debug,
1142 "Updating next transition time to element begin time (%"
1143 GST_TIME_FORMAT ")", GST_TIME_ARGS (state->next_transition_time));
1147 if ((element->end < state->next_transition_time)
1148 && (element->end > state->start_time)) {
1149 state->next_transition_time = element->end;
1150 GST_CAT_LOG (ttmlparse_debug,
1151 "Updating next transition time to element end time (%"
1152 GST_TIME_FORMAT ")", GST_TIME_ARGS (state->next_transition_time));
1159 /* Return details about the next transition after @time. */
1161 ttml_find_next_transition (GList * trees, GstClockTime time)
1164 state.start_time = time;
1165 state.next_transition_time = GST_CLOCK_TIME_NONE;
1167 for (trees = g_list_first (trees); trees; trees = trees->next) {
1168 GNode *tree = (GNode *) trees->data;
1169 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
1170 ttml_update_transition_time, &state);
1173 GST_CAT_LOG (ttmlparse_debug, "Next transition is at %" GST_TIME_FORMAT,
1174 GST_TIME_ARGS (state.next_transition_time));
1176 return state.next_transition_time;
1180 /* Remove nodes from tree that are not visible at @time. */
1182 ttml_remove_nodes_by_time (GNode * node, GstClockTime time)
1184 GNode *child, *next_child;
1185 TtmlElement *element;
1186 element = node->data;
1188 child = node->children;
1189 next_child = child ? child->next : NULL;
1191 ttml_remove_nodes_by_time (child, time);
1193 next_child = child ? child->next : NULL;
1196 if (!node->children && ((element->begin > time) || (element->end <= time))) {
1197 ttml_delete_tree (node);
1205 /* Return a list of trees containing the elements and their ancestors that are
1206 * visible at @time. */
1208 ttml_get_active_trees (GList * element_trees, GstClockTime time)
1213 for (tree = g_list_first (element_trees); tree; tree = tree->next) {
1214 GNode *root = g_node_copy_deep ((GNode *) tree->data,
1215 ttml_copy_tree_element, NULL);
1216 GST_CAT_LOG (ttmlparse_debug, "There are %u nodes in tree.",
1217 g_node_n_nodes (root, G_TRAVERSE_ALL));
1218 root = ttml_remove_nodes_by_time (root, time);
1220 GST_CAT_LOG (ttmlparse_debug,
1221 "After filtering there are %u nodes in tree.", g_node_n_nodes (root,
1224 ret = g_list_append (ret, root);
1226 GST_CAT_LOG (ttmlparse_debug,
1227 "All elements have been filtered from tree.");
1231 GST_CAT_DEBUG (ttmlparse_debug, "There are %u trees in returned list.",
1232 g_list_length (ret));
1238 ttml_create_scenes (GList * region_trees)
1240 TtmlScene *cur_scene = NULL;
1241 GList *output_scenes = NULL;
1242 GList *active_trees = NULL;
1243 GstClockTime timestamp = GST_CLOCK_TIME_NONE;
1245 while ((timestamp = ttml_find_next_transition (region_trees, timestamp))
1246 != GST_CLOCK_TIME_NONE) {
1247 GST_CAT_LOG (ttmlparse_debug,
1248 "Next transition found at time %" GST_TIME_FORMAT,
1249 GST_TIME_ARGS (timestamp));
1251 cur_scene->end = timestamp;
1252 output_scenes = g_list_append (output_scenes, cur_scene);
1255 active_trees = ttml_get_active_trees (region_trees, timestamp);
1256 GST_CAT_LOG (ttmlparse_debug, "There will be %u active regions after "
1257 "transition", g_list_length (active_trees));
1260 cur_scene = g_slice_new0 (TtmlScene);
1261 cur_scene->begin = timestamp;
1262 cur_scene->trees = active_trees;
1268 return output_scenes;
1272 /* Handle element whitespace in accordance with section 7.2.3 of the TTML
1273 * specification. Specifically, this function implements the
1274 * white-space-collapse="true" and linefeed-treatment="treat-as-space"
1275 * behaviours. Note that stripping of whitespace at the start and end of line
1276 * areas (suppress-at-line-break="auto" and
1277 * white-space-treatment="ignore-if-surrounding-linefeed" behaviours) can only
1278 * be done by the renderer once the text from multiple elements has been laid
1279 * out in line areas. */
1281 ttml_handle_element_whitespace (GNode * node, gpointer data)
1283 TtmlElement *element = node->data;
1284 guint space_count = 0;
1288 if (!element->text || (element->type == TTML_ELEMENT_TYPE_BR) ||
1289 (element->whitespace_mode == TTML_WHITESPACE_MODE_PRESERVE)) {
1293 textlen = strlen (element->text);
1294 for (c = element->text; TRUE; c = g_utf8_next_char (c)) {
1296 gchar buf[6] = { 0 };
1297 gunichar u = g_utf8_get_char (c);
1298 gint nbytes = g_unichar_to_utf8 (u, buf);
1300 /* Replace each newline or tab with a space. */
1301 if (nbytes == 1 && (buf[0] == TTML_CHAR_LF || buf[0] == TTML_CHAR_TAB)) {
1303 buf[0] = TTML_CHAR_SPACE;
1306 /* Collapse runs of whitespace. */
1307 if (nbytes == 1 && (buf[0] == TTML_CHAR_SPACE || buf[0] == TTML_CHAR_CR)) {
1310 if (space_count > 1) {
1311 gchar *new_head = c - space_count + 1;
1312 g_strlcpy (new_head, c, textlen);
1316 if (nbytes == 1 && buf[0] == TTML_CHAR_NULL)
1326 ttml_handle_whitespace (GNode * tree)
1328 g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_LEAVES, -1,
1329 ttml_handle_element_whitespace, NULL);
1334 ttml_filter_content_nodes (GNode * node)
1336 GNode *child, *next_child;
1337 TtmlElement *element = node->data;
1338 TtmlElement *parent = node->parent ? node->parent->data : NULL;
1340 child = node->children;
1341 next_child = child ? child->next : NULL;
1343 ttml_filter_content_nodes (child);
1345 next_child = child ? child->next : NULL;
1348 /* Only text content in <p>s and <span>s is significant. */
1349 if (element->type == TTML_ELEMENT_TYPE_ANON_SPAN
1350 && parent->type != TTML_ELEMENT_TYPE_P
1351 && parent->type != TTML_ELEMENT_TYPE_SPAN) {
1352 ttml_delete_element (element);
1353 g_node_destroy (node);
1361 /* Store in @table child elements of @node with name @element_name. A child
1362 * element with the same ID as an existing entry in @table will overwrite the
1363 * existing entry. */
1365 ttml_store_unique_children (xmlNodePtr node, const gchar * element_name,
1370 for (ptr = node->children; ptr; ptr = ptr->next) {
1371 if (xmlStrcmp (ptr->name, (const xmlChar *) element_name) == 0) {
1372 TtmlElement *element = ttml_parse_element (ptr);
1376 new_key = g_hash_table_insert (table, g_strdup (element->id), element);
1378 GST_CAT_WARNING (ttmlparse_debug,
1379 "Document contains two %s elements with the same ID (\"%s\").",
1380 element_name, element->id);
1387 /* Parse style and region elements from @head and store in their respective
1388 * hash tables for future reference. */
1390 ttml_parse_head (xmlNodePtr head, GHashTable * styles_table,
1391 GHashTable * regions_table)
1395 for (node = head->children; node; node = node->next) {
1396 if (xmlStrcmp (node->name, (const xmlChar *) "styling") == 0)
1397 ttml_store_unique_children (node, "style", styles_table);
1398 if (xmlStrcmp (node->name, (const xmlChar *) "layout") == 0)
1399 ttml_store_unique_children (node, "region", regions_table);
1404 /* Remove nodes that do not belong to @region, or are not an ancestor of a node
1405 * belonging to @region. */
1407 ttml_remove_nodes_by_region (GNode * node, const gchar * region)
1409 GNode *child, *next_child;
1410 TtmlElement *element;
1411 element = node->data;
1413 child = node->children;
1414 next_child = child ? child->next : NULL;
1416 ttml_remove_nodes_by_region (child, region);
1418 next_child = child ? child->next : NULL;
1421 if ((element->type == TTML_ELEMENT_TYPE_ANON_SPAN
1422 || element->type != TTML_ELEMENT_TYPE_BR)
1423 && element->region && (g_strcmp0 (element->region, region) != 0)) {
1424 ttml_delete_element (element);
1425 g_node_destroy (node);
1428 if (element->type != TTML_ELEMENT_TYPE_ANON_SPAN
1429 && element->type != TTML_ELEMENT_TYPE_BR && !node->children) {
1430 ttml_delete_element (element);
1431 g_node_destroy (node);
1439 static TtmlElement *
1440 ttml_copy_element (const TtmlElement * element)
1442 TtmlElement *ret = g_slice_new0 (TtmlElement);
1444 ret->type = element->type;
1446 ret->id = g_strdup (element->id);
1447 ret->whitespace_mode = element->whitespace_mode;
1448 if (element->styles)
1449 ret->styles = g_strdupv (element->styles);
1450 if (element->region)
1451 ret->region = g_strdup (element->region);
1452 ret->begin = element->begin;
1453 ret->end = element->end;
1454 if (element->style_set)
1455 ret->style_set = ttml_style_set_copy (element->style_set);
1457 ret->text = g_strdup (element->text);
1464 ttml_copy_tree_element (gconstpointer src, gpointer data)
1466 return ttml_copy_element ((TtmlElement *) src);
1470 /* Split the body tree into a set of trees, each containing only the elements
1471 * belonging to a single region. Returns a list of trees, one per region, each
1472 * with the corresponding region element at its root. */
1474 ttml_split_body_by_region (GNode * body, GHashTable * regions)
1476 GHashTableIter iter;
1477 gpointer key, value;
1480 g_hash_table_iter_init (&iter, regions);
1481 while (g_hash_table_iter_next (&iter, &key, &value)) {
1482 gchar *region_name = (gchar *) key;
1483 TtmlElement *region = (TtmlElement *) value;
1484 GNode *region_node = g_node_new (ttml_copy_element (region));
1485 GNode *body_copy = g_node_copy_deep (body, ttml_copy_tree_element, NULL);
1487 GST_CAT_DEBUG (ttmlparse_debug, "Creating tree for region %s", region_name);
1488 GST_CAT_LOG (ttmlparse_debug, "Copy of body has %u nodes.",
1489 g_node_n_nodes (body_copy, G_TRAVERSE_ALL));
1491 body_copy = ttml_remove_nodes_by_region (body_copy, region_name);
1493 GST_CAT_LOG (ttmlparse_debug, "Copy of body now has %u nodes.",
1494 g_node_n_nodes (body_copy, G_TRAVERSE_ALL));
1496 /* Reparent tree to region node. */
1497 g_node_prepend (region_node, body_copy);
1499 GST_CAT_LOG (ttmlparse_debug, "Final tree has %u nodes.",
1500 g_node_n_nodes (region_node, G_TRAVERSE_ALL));
1501 ret = g_list_append (ret, region_node);
1504 GST_CAT_DEBUG (ttmlparse_debug, "Returning %u trees.", g_list_length (ret));
1510 ttml_add_text_to_buffer (GstBuffer * buf, const gchar * text)
1516 if (gst_buffer_n_memory (buf) == gst_buffer_get_max_memory ())
1519 mem = gst_allocator_alloc (NULL, strlen (text) + 1, NULL);
1520 if (!gst_memory_map (mem, &map, GST_MAP_WRITE))
1521 GST_CAT_ERROR (ttmlparse_debug, "Failed to map memory.");
1523 g_strlcpy ((gchar *) map.data, text, map.size);
1524 GST_CAT_DEBUG (ttmlparse_debug, "Inserted following text into buffer: \"%s\"",
1525 (gchar *) map.data);
1526 gst_memory_unmap (mem, &map);
1528 ret = gst_buffer_n_memory (buf);
1529 gst_buffer_insert_memory (buf, -1, mem);
1534 /* Create a GstSubtitleElement from @element, add it to @block, and insert its
1535 * associated text in @buf. */
1537 ttml_add_element (GstSubtitleBlock * block, TtmlElement * element,
1538 GstBuffer * buf, guint cellres_x, guint cellres_y)
1540 GstSubtitleStyleSet *element_style = NULL;
1542 GstSubtitleElement *sub_element = NULL;
1544 buffer_index = ttml_add_text_to_buffer (buf, element->text);
1545 if (buffer_index == -1) {
1546 GST_CAT_WARNING (ttmlparse_debug,
1547 "Reached maximum element count for buffer - discarding element.");
1551 GST_CAT_DEBUG (ttmlparse_debug, "Inserted text at index %u in GstBuffer.",
1554 element_style = gst_subtitle_style_set_new ();
1555 ttml_update_style_set (element_style, element->style_set,
1556 cellres_x, cellres_y);
1557 sub_element = gst_subtitle_element_new (element_style, buffer_index,
1558 (element->whitespace_mode != TTML_WHITESPACE_MODE_PRESERVE));
1560 gst_subtitle_block_add_element (block, sub_element);
1561 GST_CAT_DEBUG (ttmlparse_debug,
1562 "Added element to block; there are now %u elements in the block.",
1563 gst_subtitle_block_get_element_count (block));
1568 /* Return TRUE if @color is totally transparent. */
1570 ttml_color_is_transparent (const GstSubtitleColor * color)
1575 return (color->a == 0);
1579 /* Blend @color2 over @color1 and return the resulting color. This is currently
1580 * a dummy implementation that simply returns color2 as long as it's
1581 * not fully transparent. */
1582 /* TODO: Implement actual blending of colors. */
1583 static GstSubtitleColor
1584 ttml_blend_colors (GstSubtitleColor color1, GstSubtitleColor color2)
1586 if (ttml_color_is_transparent (&color2))
1594 ttml_warn_of_mispositioned_element (TtmlElement * element)
1596 gchar *type = ttml_get_element_type_string (element);
1597 GST_CAT_WARNING (ttmlparse_debug, "Ignoring illegally positioned %s element.",
1603 /* Create the subtitle region and its child blocks and elements for @tree,
1604 * inserting element text in @buf. Ownership of created region is transferred
1606 static GstSubtitleRegion *
1607 ttml_create_subtitle_region (GNode * tree, GstBuffer * buf, guint cellres_x,
1610 GstSubtitleRegion *region = NULL;
1611 GstSubtitleStyleSet *region_style;
1612 GstSubtitleColor block_color;
1613 TtmlElement *element;
1616 element = tree->data; /* Region element */
1617 region_style = gst_subtitle_style_set_new ();
1618 ttml_update_style_set (region_style, element->style_set, cellres_x,
1620 region = gst_subtitle_region_new (region_style);
1622 node = tree->children;
1626 element = node->data; /* Body element */
1628 ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
1629 "backgroundColor"));
1631 for (node = node->children; node; node = node->next) {
1633 GstSubtitleColor div_color;
1635 element = node->data;
1636 if (element->type != TTML_ELEMENT_TYPE_DIV) {
1637 ttml_warn_of_mispositioned_element (element);
1641 ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
1642 "backgroundColor"));
1643 block_color = ttml_blend_colors (block_color, div_color);
1645 for (p_node = node->children; p_node; p_node = p_node->next) {
1646 GstSubtitleBlock *block = NULL;
1647 GstSubtitleStyleSet *block_style;
1648 GNode *content_node;
1649 GstSubtitleColor p_color;
1651 element = p_node->data;
1652 if (element->type != TTML_ELEMENT_TYPE_P) {
1653 ttml_warn_of_mispositioned_element (element);
1657 ttml_parse_colorstring (ttml_style_set_get_attr (element->style_set,
1658 "backgroundColor"));
1659 block_color = ttml_blend_colors (block_color, p_color);
1661 block_style = gst_subtitle_style_set_new ();
1662 ttml_update_style_set (block_style, element->style_set, cellres_x,
1664 block_style->background_color = block_color;
1665 block = gst_subtitle_block_new (block_style);
1667 for (content_node = p_node->children; content_node;
1668 content_node = content_node->next) {
1670 element = content_node->data;
1672 if (element->type == TTML_ELEMENT_TYPE_BR
1673 || element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
1674 if (!ttml_add_element (block, element, buf, cellres_x, cellres_y))
1675 GST_CAT_WARNING (ttmlparse_debug,
1676 "Failed to add element to buffer.");
1677 } else if (element->type == TTML_ELEMENT_TYPE_SPAN) {
1678 /* Loop through anon-span children of this span. */
1679 for (anon_node = content_node->children; anon_node;
1680 anon_node = anon_node->next) {
1681 element = anon_node->data;
1683 if (element->type == TTML_ELEMENT_TYPE_BR
1684 || element->type == TTML_ELEMENT_TYPE_ANON_SPAN) {
1685 if (!ttml_add_element (block, element, buf, cellres_x, cellres_y))
1686 GST_CAT_WARNING (ttmlparse_debug,
1687 "Failed to add element to buffer.");
1689 ttml_warn_of_mispositioned_element (element);
1693 ttml_warn_of_mispositioned_element (element);
1697 if (gst_subtitle_block_get_element_count (block) > 0) {
1698 gst_subtitle_region_add_block (region, block);
1699 GST_CAT_DEBUG (ttmlparse_debug,
1700 "Added block to region; there are now %u blocks in the region.",
1701 gst_subtitle_region_get_block_count (region));
1703 gst_subtitle_block_unref (block);
1712 /* For each scene, create data objects to describe the layout and styling of
1713 * that scene and attach it as metadata to the GstBuffer that will be used to
1714 * carry that scene's text. */
1716 ttml_attach_scene_metadata (GList * scenes, guint cellres_x, guint cellres_y)
1720 for (scene_entry = g_list_first (scenes); scene_entry;
1721 scene_entry = scene_entry->next) {
1722 TtmlScene *scene = scene_entry->data;
1724 GPtrArray *regions = g_ptr_array_new_with_free_func (
1725 (GDestroyNotify) gst_subtitle_region_unref);
1727 scene->buf = gst_buffer_new ();
1728 GST_BUFFER_PTS (scene->buf) = scene->begin;
1729 GST_BUFFER_DURATION (scene->buf) = (scene->end - scene->begin);
1731 for (region_tree = g_list_first (scene->trees); region_tree;
1732 region_tree = region_tree->next) {
1733 GNode *tree = (GNode *) region_tree->data;
1734 GstSubtitleRegion *region;
1736 region = ttml_create_subtitle_region (tree, scene->buf, cellres_x,
1739 g_ptr_array_add (regions, region);
1742 gst_buffer_add_subtitle_meta (scene->buf, regions);
1748 create_buffer_list (GList * scenes)
1753 TtmlScene *scene = scenes->data;
1754 ret = g_list_prepend (ret, gst_buffer_ref (scene->buf));
1755 scenes = scenes->next;
1757 return g_list_reverse (ret);
1762 ttml_delete_scene (TtmlScene * scene)
1765 g_list_free_full (scene->trees, (GDestroyNotify) ttml_delete_tree);
1767 gst_buffer_unref (scene->buf);
1768 g_slice_free (TtmlScene, scene);
1773 ttml_assign_region_times (GList * region_trees, GstClockTime doc_begin,
1774 GstClockTime doc_duration)
1778 for (tree = g_list_first (region_trees); tree; tree = tree->next) {
1779 GNode *region_node = (GNode *) tree->data;
1780 TtmlElement *region = (TtmlElement *) region_node->data;
1781 const gchar *show_background_value =
1782 ttml_style_set_get_attr (region->style_set, "showBackground");
1783 gboolean always_visible =
1784 (g_strcmp0 (show_background_value, "whenActive") != 0);
1786 GstSubtitleColor region_color = { 0, 0, 0, 0 };
1787 if (ttml_style_set_contains_attr (region->style_set, "backgroundColor"))
1789 ttml_parse_colorstring (ttml_style_set_get_attr (region->style_set,
1790 "backgroundColor"));
1792 if (always_visible && !ttml_color_is_transparent (®ion_color)) {
1793 GST_CAT_DEBUG (ttmlparse_debug, "Assigning times to region.");
1794 /* If the input XML document was not encapsulated in a container that
1795 * provides timing information for the document as a whole (i.e., its
1796 * PTS and duration) and the region background should be always visible,
1797 * set region start time to 0 and end time to 24 hours. This ensures that
1798 * regions with showBackground="always" are visible for the entirety of
1799 * any real-world stream. */
1800 region->begin = (doc_begin != GST_CLOCK_TIME_NONE) ? doc_begin : 0;
1801 region->end = (doc_duration != GST_CLOCK_TIME_NONE) ?
1802 region->begin + doc_duration : NSECONDS_IN_DAY;
1809 * Promotes @node to the position of its parent, setting the prev, next and
1810 * parent pointers of @node to that of its original parent. The replaced parent
1811 * is freed. Should be called only on nodes that are the sole child of their
1812 * parent, otherwise sibling nodes may be leaked.
1815 ttml_promote_node (GNode * node)
1817 GNode *parent_node = node->parent;
1818 TtmlElement *parent_element;
1822 parent_element = (TtmlElement *) parent_node->data;
1824 node->prev = parent_node->prev;
1826 parent_node->parent->children = node;
1828 node->prev->next = node;
1829 node->next = parent_node->next;
1831 node->next->prev = node;
1832 node->parent = parent_node->parent;
1834 parent_node->prev = parent_node->next = NULL;
1835 parent_node->parent = parent_node->children = NULL;
1836 g_node_destroy (parent_node);
1837 ttml_delete_element (parent_element);
1842 * Returns TRUE if @element is of a type that can be joined with another
1846 ttml_element_is_joinable (TtmlElement * element)
1848 return element->type == TTML_ELEMENT_TYPE_ANON_SPAN ||
1849 element->type == TTML_ELEMENT_TYPE_BR;
1853 /* Joins adjacent inline element in @tree that have the same styling. */
1855 ttml_join_region_tree_inline_elements (GNode * tree)
1859 for (n1 = tree; n1; n1 = n1->next) {
1861 TtmlElement *element = (TtmlElement *) n1->data;
1862 ttml_join_region_tree_inline_elements (n1->children);
1863 if (element->type == TTML_ELEMENT_TYPE_SPAN &&
1864 g_node_n_children (n1) == 1) {
1865 GNode *child = n1->children;
1868 ttml_promote_node (child);
1878 TtmlElement *e1 = (TtmlElement *) n1->data;
1879 TtmlElement *e2 = (TtmlElement *) n2->data;
1881 if (ttml_element_is_joinable (e1) &&
1882 ttml_element_is_joinable (e2) && ttml_element_styles_match (e1, e2)) {
1883 gchar *tmp = e1->text;
1884 GST_CAT_LOG (ttmlparse_debug,
1885 "Joining adjacent element text \"%s\" & \"%s\"", e1->text, e2->text);
1886 e1->text = g_strconcat (e1->text, e2->text, NULL);
1887 e1->type = TTML_ELEMENT_TYPE_ANON_SPAN;
1890 ttml_delete_element (e2);
1891 g_node_destroy (n2);
1902 ttml_join_inline_elements (GList * scenes)
1906 for (scene_entry = g_list_first (scenes); scene_entry;
1907 scene_entry = scene_entry->next) {
1908 TtmlScene *scene = scene_entry->data;
1911 for (region_tree = g_list_first (scene->trees); region_tree;
1912 region_tree = region_tree->next) {
1913 GNode *tree = (GNode *) region_tree->data;
1914 ttml_join_region_tree_inline_elements (tree);
1921 ttml_find_child (xmlNodePtr parent, const gchar * name)
1923 xmlNodePtr child = parent->children;
1924 while (child && xmlStrcmp (child->name, (const xmlChar *) name) != 0)
1925 child = child->next;
1929 #define XML_START_TAG "<?xml"
1930 #define TTML_END_TAG "</tt>"
1933 ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration,
1937 xmlNodePtr root_node, head_node, body_node;
1939 GHashTable *styles_table, *regions_table;
1940 GList *output_buffers = NULL;
1942 guint cellres_x, cellres_y;
1943 TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT;
1945 guint start_offset = 0;
1946 gchar *start_xml, *end_tt;
1948 g_return_val_if_fail (parsed != NULL, 0);
1951 if (!g_utf8_validate (input, -1, NULL)) {
1952 GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8.");
1955 GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input);
1957 start_xml = g_strstr_len (input, strlen (input), XML_START_TAG);
1958 end_tt = g_strstr_len (input, strlen (input), TTML_END_TAG);
1960 if (!start_xml || !end_tt) {
1961 GST_CAT_DEBUG (ttmlparse_debug, "Need more data");
1965 consumed = end_tt - input + strlen (TTML_END_TAG);
1966 start_offset = start_xml - input;
1968 styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
1969 (GDestroyNotify) ttml_delete_element);
1970 regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
1971 (GDestroyNotify) ttml_delete_element);
1974 doc = xmlReadMemory (start_xml, consumed - start_offset, "any_doc_name",
1977 GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document.");
1981 root_node = xmlDocGetRootElement (doc);
1983 if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) {
1984 GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt.");
1989 if ((value = ttml_get_xml_property (root_node, "cellResolution"))) {
1991 cellres_x = (guint) g_ascii_strtoull (ptr, &ptr, 10U);
1992 cellres_y = (guint) g_ascii_strtoull (ptr, NULL, 10U);
1995 cellres_x = DEFAULT_CELLRES_X;
1996 cellres_y = DEFAULT_CELLRES_Y;
1999 GST_CAT_DEBUG (ttmlparse_debug, "cellres_x: %u cellres_y: %u", cellres_x,
2002 if ((value = ttml_get_xml_property (root_node, "space"))) {
2003 if (g_strcmp0 (value, "preserve") == 0) {
2004 GST_CAT_DEBUG (ttmlparse_debug, "Preserving whitespace...");
2005 doc_whitespace_mode = TTML_WHITESPACE_MODE_PRESERVE;
2010 if (!(head_node = ttml_find_child (root_node, "head"))) {
2011 GST_CAT_ERROR (ttmlparse_debug, "No <head> element found.");
2016 ttml_parse_head (head_node, styles_table, regions_table);
2018 if ((body_node = ttml_find_child (root_node, "body"))) {
2020 GList *region_trees = NULL;
2021 GList *scenes = NULL;
2023 body_tree = ttml_parse_body (body_node);
2024 GST_CAT_LOG (ttmlparse_debug, "body_tree tree contains %u nodes.",
2025 g_node_n_nodes (body_tree, G_TRAVERSE_ALL));
2026 GST_CAT_LOG (ttmlparse_debug, "body_tree tree height is %u",
2027 g_node_max_height (body_tree));
2029 ttml_inherit_whitespace_mode (body_tree, doc_whitespace_mode);
2030 ttml_handle_whitespace (body_tree);
2031 ttml_filter_content_nodes (body_tree);
2032 if (GST_CLOCK_TIME_IS_VALID (begin) && GST_CLOCK_TIME_IS_VALID (duration))
2033 ttml_apply_time_window (body_tree, begin, begin + duration);
2034 ttml_resolve_timings (body_tree);
2035 ttml_resolve_regions (body_tree);
2036 region_trees = ttml_split_body_by_region (body_tree, regions_table);
2037 ttml_resolve_referenced_styles (region_trees, styles_table);
2038 ttml_inherit_element_styles (region_trees);
2039 ttml_assign_region_times (region_trees, begin, duration);
2040 scenes = ttml_create_scenes (region_trees);
2041 GST_CAT_LOG (ttmlparse_debug, "There are %u scenes in all.",
2042 g_list_length (scenes));
2043 ttml_join_inline_elements (scenes);
2044 ttml_attach_scene_metadata (scenes, cellres_x, cellres_y);
2045 output_buffers = create_buffer_list (scenes);
2047 g_list_free_full (scenes, (GDestroyNotify) ttml_delete_scene);
2048 g_list_free_full (region_trees, (GDestroyNotify) ttml_delete_tree);
2049 ttml_delete_tree (body_tree);
2053 g_hash_table_destroy (styles_table);
2054 g_hash_table_destroy (regions_table);
2056 *parsed = output_buffers;