From f9cc8757c9a1ba59b3b201d0816873c9108e2a29 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Fri, 20 Mar 2020 19:09:17 +0100 Subject: [PATCH] subparse: convert from pango-markup to utf8 .. when downstream requires it Change-Id: I855f284401d8f5abbc4a1b1351f541c9883c92e4 --- gst/subparse/gstsubparse.c | 131 +++++++++++++++++++++++++++++++++++++++- gst/subparse/gstsubparse.h | 4 +- tests/check/elements/subparse.c | 35 +++++++++++ 3 files changed, 168 insertions(+), 2 deletions(-) diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index c702a33..a046840 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -206,6 +206,9 @@ gst_sub_parse_init (GstSubParse * subparse) subparse->textbuf = g_string_new (NULL); subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN; +#ifdef TIZEN_FEATURE_UPSTREAM + subparse->strip_pango_markup = FALSE; +#endif subparse->flushing = FALSE; gst_segment_init (&subparse->segment, GST_FORMAT_TIME); subparse->need_segment = TRUE; @@ -1876,11 +1879,101 @@ feed_textbuf (GstSubParse * self, GstBuffer * buf) g_free (input); } +#ifdef TIZEN_FEATURE_UPSTREAM +static void +xml_text (GMarkupParseContext * context, + const gchar * text, gsize text_len, gpointer user_data, GError ** error) +{ + gchar **accum = (gchar **) user_data; + gchar *concat; + + if (*accum) { + concat = g_strconcat (*accum, text, NULL); + g_free (*accum); + *accum = concat; + } else { + *accum = g_strdup (text); + } +} + +static gchar * +strip_pango_markup (gchar * markup, GError ** error) +{ + GMarkupParser parser = { 0, }; + GMarkupParseContext *context; + gchar *accum = NULL; + + parser.text = xml_text; + context = g_markup_parse_context_new (&parser, 0, &accum, NULL); + + g_markup_parse_context_parse (context, "", 6, NULL); + g_markup_parse_context_parse (context, markup, strlen (markup), error); + g_markup_parse_context_parse (context, "", 7, NULL); + if (*error) + goto error; + + g_markup_parse_context_end_parse (context, error); + if (*error) + goto error; + +done: + g_markup_parse_context_free (context); + return accum; + +error: + g_free (accum); + accum = NULL; + goto done; +} + +static gboolean +gst_sub_parse_negotiate (GstSubParse * self, GstCaps * preferred) +{ + GstCaps *caps; + gboolean ret = FALSE; + const GstStructure *s1, *s2; + + caps = gst_pad_get_allowed_caps (self->srcpad); + + s1 = gst_caps_get_structure (preferred, 0); + + if (!g_strcmp0 (gst_structure_get_string (s1, "format"), "utf8")) { + GstCaps *intersected = gst_caps_intersect (caps, preferred); + gst_caps_unref (caps); + caps = intersected; + } + + caps = gst_caps_fixate (caps); + + if (gst_caps_is_empty (caps)) { + goto done; + } + + s2 = gst_caps_get_structure (caps, 0); + + self->strip_pango_markup = + !g_strcmp0 (gst_structure_get_string (s2, "format"), "utf8") + && !g_strcmp0 (gst_structure_get_string (s1, "format"), "pango-markup"); + + if (self->strip_pango_markup) { + GST_INFO_OBJECT (self, "We will convert from pango-markup to utf8"); + } + + ret = gst_pad_set_caps (self->srcpad, caps); + +done: + gst_caps_unref (caps); + return ret; +} +#endif + static GstFlowReturn handle_buffer (GstSubParse * self, GstBuffer * buf) { GstFlowReturn ret = GST_FLOW_OK; +#ifndef TIZEN_FEATURE_UPSTREAM GstCaps *caps = NULL; +#endif gchar *line, *subtitle; gboolean need_tags = FALSE; #ifdef TIZEN_FEATURE_SUBPARSE_MODIFICATION @@ -1902,6 +1995,20 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) /* make sure we know the format */ if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) { +#ifdef TIZEN_FEATURE_UPSTREAM + GstCaps *preferred; + + if (!(preferred = gst_sub_parse_format_autodetect (self))) { + return GST_FLOW_NOT_NEGOTIATED; + } + + if (!gst_sub_parse_negotiate (self, preferred)) { + gst_caps_unref (preferred); + return GST_FLOW_NOT_NEGOTIATED; + } + + gst_caps_unref (preferred); +#else if (!(caps = gst_sub_parse_format_autodetect (self))) { return GST_FLOW_EOS; } @@ -1910,6 +2017,7 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) return GST_FLOW_EOS; } gst_caps_unref (caps); +#endif need_tags = TRUE; } @@ -1960,8 +2068,26 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) } #endif if (subtitle) { - guint subtitle_len = strlen (subtitle); +#ifdef TIZEN_FEATURE_UPSTREAM + guint subtitle_len; + + if (self->strip_pango_markup) { + GError *error = NULL; + gchar *stripped; + + if ((stripped = strip_pango_markup (subtitle, &error))) { + g_free (subtitle); + subtitle = stripped; + } else { + GST_WARNING_OBJECT (self, "Failed to strip pango markup: %s", + error->message); + } + } + subtitle_len = strlen (subtitle); +#else + guint subtitle_len = strlen (subtitle); +#endif /* +1 for terminating NUL character */ buf = gst_buffer_new_and_alloc (subtitle_len + 1); @@ -2137,6 +2263,9 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition) /* format detection will init the parser state */ self->offset = 0; self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN; +#ifdef TIZEN_FEATURE_UPSTREAM + self->strip_pango_markup = FALSE; +#endif self->valid_utf8 = TRUE; self->first_buffer = TRUE; g_free (self->detected_encoding); diff --git a/gst/subparse/gstsubparse.h b/gst/subparse/gstsubparse.h index af7520e..59f6d75 100644 --- a/gst/subparse/gstsubparse.h +++ b/gst/subparse/gstsubparse.h @@ -118,7 +118,9 @@ struct _GstSubParse { gboolean valid_utf8; gchar *detected_encoding; gchar *encoding; - +#ifdef TIZEN_FEATURE_UPSTREAM + gboolean strip_pango_markup; +#endif gboolean first_buffer; /* used by frame based parsers */ diff --git a/tests/check/elements/subparse.c b/tests/check/elements/subparse.c index e6c75e0..c20ee3e 100644 --- a/tests/check/elements/subparse.c +++ b/tests/check/elements/subparse.c @@ -22,6 +22,9 @@ #endif #include +#ifdef TIZEN_FEATURE_UPSTREAM +#include +#endif #include @@ -1036,6 +1039,35 @@ GST_START_TEST (test_lrc) GST_END_TEST; +#ifdef TIZEN_FEATURE_UPSTREAM +GST_START_TEST (test_raw_conversion) +{ + GstHarness *h; + GstBuffer *buffer; + GstMapInfo map; + + h = gst_harness_new ("subparse"); + + gst_harness_set_src_caps_str (h, "application/x-subtitle"); + gst_harness_set_sink_caps_str (h, "text/x-raw, format=utf8"); + + buffer = buffer_from_static_string (srt_input[5].in); + + buffer = gst_harness_push_and_pull (h, buffer); + + gst_buffer_map (buffer, &map, GST_MAP_READ); + fail_unless_equals_int (map.size, 3); + fail_unless_equals_string ((gchar *) map.data, "Six"); + gst_buffer_unmap (buffer, &map); + + gst_clear_buffer (&buffer); + + gst_harness_teardown (h); +} + +GST_END_TEST; +#endif + /* TODO: * - add/modify tests so that lines aren't dogfed to the parsers in complete * lines or sets of complete lines, but rather in random chunks @@ -1071,6 +1103,9 @@ subparse_suite (void) tcase_add_test (tc_chain, test_sami_bad_entities); tcase_add_test (tc_chain, test_sami_comment); tcase_add_test (tc_chain, test_lrc); +#ifdef TIZEN_FEATURE_UPSTREAM + tcase_add_test (tc_chain, test_raw_conversion); +#endif return s; } -- 2.7.4