#include <string.h>
#include "gstssaparse.h"
+#include "gstsubparseelements.h"
+
GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
+#undef GST_CAT_DEFAULT
#define GST_CAT_DEFAULT ssa_parse_debug
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
#define gst_ssa_parse_parent_class parent_class
G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT);
+GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (ssaparse, "ssaparse",
+ GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE, sub_parse_element_init (plugin));
+
static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
element, GstStateChange transition);
#include <glib.h>
#include "gstsubparse.h"
+
#include "gstssaparse.h"
#include "samiparse.h"
#include "tmplayerparse.h"
#include "mpl2parse.h"
#include "qttextparse.h"
-
-GST_DEBUG_CATEGORY (sub_parse_debug);
+#include "gstsubparseelements.h"
#define DEFAULT_ENCODING NULL
#define ATTRIBUTE_REGEX "\\s?[a-zA-Z0-9\\. \t\\(\\)]*"
#define gst_sub_parse_parent_class parent_class
G_DEFINE_TYPE (GstSubParse, gst_sub_parse, GST_TYPE_ELEMENT);
-static void
-gst_sub_parse_dispose (GObject * object)
+GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (subparse, "subparse",
+ GST_RANK_PRIMARY, GST_TYPE_SUBPARSE, sub_parse_element_init (plugin))
+
+
+ static void gst_sub_parse_dispose (GObject * object)
{
GstSubParse *subparse = GST_SUBPARSE (object);
return NULL;
}
-static gchar *
-gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
- gsize * consumed, GError ** err)
-{
- gchar *ret = NULL;
-
- *consumed = 0;
- /* The char cast is necessary in glib < 2.24 */
- ret =
- g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
- consumed, NULL, err);
- if (ret == NULL)
- return ret;
-
- /* + 3 to skip UTF-8 BOM if it was added */
- len = strlen (ret);
- if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
- && (guint8) ret[2] == 0xBF)
- memmove (ret, ret + 3, len + 1 - 3);
-
- return ret;
-}
-
-static gchar *
-detect_encoding (const gchar * str, gsize len)
-{
- if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
- && (guint8) str[2] == 0xBF)
- return g_strdup ("UTF-8");
-
- if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
- return g_strdup ("UTF-16BE");
-
- if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
- return g_strdup ("UTF-16LE");
- if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
- && (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
- return g_strdup ("UTF-32BE");
- if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
- && (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
- return g_strdup ("UTF-32LE");
- return NULL;
-}
static gchar *
convert_encoding (GstSubParse * self, const gchar * str, gsize len,
/* First try any detected encoding */
if (self->detected_encoding) {
ret =
- gst_convert_to_utf8 (str, len, self->detected_encoding, consumed, &err);
+ gst_sub_parse_gst_convert_to_utf8 (str, len, self->detected_encoding,
+ consumed, &err);
if (!err)
return ret;
}
}
- ret = gst_convert_to_utf8 (str, len, encoding, consumed, &err);
+ ret = gst_sub_parse_gst_convert_to_utf8 (str, len, encoding, consumed, &err);
if (err) {
GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
g_clear_error (&err);
/* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
- ret = gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed, NULL);
+ ret =
+ gst_sub_parse_gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed,
+ NULL);
}
GST_LOG_OBJECT (self,
state->allowed_tags = NULL;
}
-/* regex type enum */
-typedef enum
-{
- GST_SUB_PARSE_REGEX_UNKNOWN = 0,
- GST_SUB_PARSE_REGEX_MDVDSUB = 1,
- GST_SUB_PARSE_REGEX_SUBRIP = 2,
- GST_SUB_PARSE_REGEX_DKS = 3,
- GST_SUB_PARSE_REGEX_VTT = 4,
-} GstSubParseRegex;
-
-static gpointer
-gst_sub_parse_data_format_autodetect_regex_once (GstSubParseRegex regtype)
-{
- gpointer result = NULL;
- GError *gerr = NULL;
- switch (regtype) {
- case GST_SUB_PARSE_REGEX_MDVDSUB:
- result =
- (gpointer) g_regex_new ("^\\{[0-9]+\\}\\{[0-9]+\\}",
- G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
- if (result == NULL) {
- g_warning ("Compilation of mdvd regex failed: %s", gerr->message);
- g_clear_error (&gerr);
- }
- break;
- case GST_SUB_PARSE_REGEX_SUBRIP:
- result = (gpointer)
- g_regex_new ("^[\\s\\n]*[\\n]? {0,3}[ 0-9]{1,4}\\s*(\x0d)?\x0a"
- " ?[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,3}"
- " +--> +[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,2}",
- G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
- if (result == NULL) {
- g_warning ("Compilation of subrip regex failed: %s", gerr->message);
- g_clear_error (&gerr);
- }
- break;
- case GST_SUB_PARSE_REGEX_DKS:
- result = (gpointer) g_regex_new ("^\\[[0-9]+:[0-9]+:[0-9]+\\].*",
- G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
- if (result == NULL) {
- g_warning ("Compilation of dks regex failed: %s", gerr->message);
- g_clear_error (&gerr);
- }
- break;
- case GST_SUB_PARSE_REGEX_VTT:
- result = (gpointer)
- g_regex_new ("^(\\xef\\xbb\\xbf)?WEBVTT[\\xa\\xd\\x20\\x9]", 0, 0,
- &gerr);
- if (result == NULL) {
- g_warning ("Compilation of vtt regex failed: %s", gerr->message);
- g_error_free (gerr);
- }
- break;
- default:
- GST_WARNING ("Trying to allocate regex of unknown type %u", regtype);
- }
- return result;
-}
-
-/*
- * FIXME: maybe we should pass along a second argument, the preceding
- * text buffer, because that is how this originally worked, even though
- * I don't really see the use of that.
- */
-
-static GstSubParseFormat
-gst_sub_parse_data_format_autodetect (gchar * match_str)
-{
- guint n1, n2, n3;
-
- static GOnce mdvd_rx_once = G_ONCE_INIT;
- static GOnce subrip_rx_once = G_ONCE_INIT;
- static GOnce dks_rx_once = G_ONCE_INIT;
- static GOnce vtt_rx_once = G_ONCE_INIT;
-
- GRegex *mdvd_grx;
- GRegex *subrip_grx;
- GRegex *dks_grx;
- GRegex *vtt_grx;
-
- g_once (&mdvd_rx_once,
- (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
- (gpointer) GST_SUB_PARSE_REGEX_MDVDSUB);
- g_once (&subrip_rx_once,
- (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
- (gpointer) GST_SUB_PARSE_REGEX_SUBRIP);
- g_once (&dks_rx_once,
- (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
- (gpointer) GST_SUB_PARSE_REGEX_DKS);
- g_once (&vtt_rx_once,
- (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
- (gpointer) GST_SUB_PARSE_REGEX_VTT);
-
- mdvd_grx = (GRegex *) mdvd_rx_once.retval;
- subrip_grx = (GRegex *) subrip_rx_once.retval;
- dks_grx = (GRegex *) dks_rx_once.retval;
- vtt_grx = (GRegex *) vtt_rx_once.retval;
-
- if (g_regex_match (mdvd_grx, match_str, 0, NULL)) {
- GST_LOG ("MicroDVD (frame based) format detected");
- return GST_SUB_PARSE_FORMAT_MDVDSUB;
- }
- if (g_regex_match (subrip_grx, match_str, 0, NULL)) {
- GST_LOG ("SubRip (time based) format detected");
- return GST_SUB_PARSE_FORMAT_SUBRIP;
- }
- if (g_regex_match (dks_grx, match_str, 0, NULL)) {
- GST_LOG ("DKS (time based) format detected");
- return GST_SUB_PARSE_FORMAT_DKS;
- }
- if (g_regex_match (vtt_grx, match_str, 0, NULL) == TRUE) {
- GST_LOG ("WebVTT (time based) format detected");
- return GST_SUB_PARSE_FORMAT_VTT;
- }
-
- if (!strncmp (match_str, "FORMAT=TIME", 11)) {
- GST_LOG ("MPSub (time based) format detected");
- return GST_SUB_PARSE_FORMAT_MPSUB;
- }
- if (strstr (match_str, "<SAMI>") != NULL ||
- strstr (match_str, "<sami>") != NULL) {
- GST_LOG ("SAMI (time based) format detected");
- return GST_SUB_PARSE_FORMAT_SAMI;
- }
- /* we're boldly assuming the first subtitle appears within the first hour */
- if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
- sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
- sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 ||
- sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 ||
- sscanf (match_str, "00:%02u:%02u,%u=", &n1, &n2, &n3) == 3) {
- GST_LOG ("TMPlayer (time based) format detected");
- return GST_SUB_PARSE_FORMAT_TMPLAYER;
- }
- if (sscanf (match_str, "[%u][%u]", &n1, &n2) == 2) {
- GST_LOG ("MPL2 (time based) format detected");
- return GST_SUB_PARSE_FORMAT_MPL2;
- }
- if (strstr (match_str, "[INFORMATION]") != NULL) {
- GST_LOG ("SubViewer (time based) format detected");
- return GST_SUB_PARSE_FORMAT_SUBVIEWER;
- }
- if (strstr (match_str, "{QTtext}") != NULL) {
- GST_LOG ("QTtext (time based) format detected");
- return GST_SUB_PARSE_FORMAT_QTTEXT;
- }
- /* We assume the LRC file starts immediately */
- if (match_str[0] == '[') {
- gboolean all_lines_good = TRUE;
- gchar **split;
- gchar **ptr;
-
- ptr = split = g_strsplit (match_str, "\n", -1);
- while (*ptr && *(ptr + 1)) {
- gchar *str = *ptr;
- gint len = strlen (str);
-
- if (sscanf (str, "[%u:%02u.%02u]", &n1, &n2, &n3) == 3 ||
- sscanf (str, "[%u:%02u.%03u]", &n1, &n2, &n3) == 3) {
- all_lines_good = TRUE;
- } else if (str[len - 1] == ']' && strchr (str, ':') != NULL) {
- all_lines_good = TRUE;
- } else {
- all_lines_good = FALSE;
- break;
- }
-
- ptr++;
- }
- g_strfreev (split);
-
- if (all_lines_good)
- return GST_SUB_PARSE_FORMAT_LRC;
- }
-
- GST_DEBUG ("no subtitle format detected");
- return GST_SUB_PARSE_FORMAT_UNKNOWN;
-}
static GstCaps *
gst_sub_parse_format_autodetect (GstSubParse * self)
GstMapInfo map;
gst_buffer_map (buf, &map, GST_MAP_READ);
- self->detected_encoding = detect_encoding ((gchar *) map.data, map.size);
+ self->detected_encoding =
+ gst_sub_parse_detect_encoding ((gchar *) map.data, map.size);
gst_buffer_unmap (buf, &map);
self->first_buffer = FALSE;
self->state.fps_n = self->fps_n;
return ret;
}
-
-/*
- * Typefind support.
- */
-
-/* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so;
- * also, give different subtitle formats really different types */
-static GstStaticCaps mpl2_caps =
-GST_STATIC_CAPS ("application/x-subtitle-mpl2");
-#define SUB_CAPS (gst_static_caps_get (&sub_caps))
-
-static GstStaticCaps tmp_caps =
-GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
-#define TMP_CAPS (gst_static_caps_get (&tmp_caps))
-
-static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
-#define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
-
-static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
-#define SAMI_CAPS (gst_static_caps_get (&smi_caps))
-
-static GstStaticCaps dks_caps = GST_STATIC_CAPS ("application/x-subtitle-dks");
-#define DKS_CAPS (gst_static_caps_get (&dks_caps))
-
-static GstStaticCaps vtt_caps = GST_STATIC_CAPS ("application/x-subtitle-vtt");
-#define VTT_CAPS (gst_static_caps_get (&vtt_caps))
-
-static GstStaticCaps qttext_caps =
-GST_STATIC_CAPS ("application/x-subtitle-qttext");
-#define QTTEXT_CAPS (gst_static_caps_get (&qttext_caps))
-
-static GstStaticCaps lrc_caps = GST_STATIC_CAPS ("application/x-subtitle-lrc");
-#define LRC_CAPS (gst_static_caps_get (&lrc_caps))
-
-static void
-gst_subparse_type_find (GstTypeFind * tf, gpointer private)
-{
- GstSubParseFormat format;
- const guint8 *data;
- GstCaps *caps;
- gchar *str;
- gchar *encoding = NULL;
- const gchar *end;
-
- if (!(data = gst_type_find_peek (tf, 0, 129)))
- return;
-
- /* make sure string passed to _autodetect() is NUL-terminated */
- str = g_malloc0 (129);
- memcpy (str, data, 128);
-
- if ((encoding = detect_encoding (str, 128)) != NULL) {
- gchar *converted_str;
- GError *err = NULL;
- gsize tmp;
-
- converted_str = gst_convert_to_utf8 (str, 128, encoding, &tmp, &err);
- if (converted_str == NULL) {
- GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
- err->message);
- g_clear_error (&err);
- } else {
- g_free (str);
- str = converted_str;
- }
- g_free (encoding);
- }
-
- /* Check if at least the first 120 chars are valid UTF8,
- * otherwise convert as always */
- if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) {
- gchar *converted_str;
- gsize tmp;
- const gchar *enc;
-
- enc = g_getenv ("GST_SUBTITLE_ENCODING");
- if (enc == NULL || *enc == '\0') {
- /* if local encoding is UTF-8 and no encoding specified
- * via the environment variable, assume ISO-8859-15 */
- if (g_get_charset (&enc)) {
- enc = "ISO-8859-15";
- }
- }
- converted_str = gst_convert_to_utf8 (str, 128, enc, &tmp, NULL);
- if (converted_str != NULL) {
- g_free (str);
- str = converted_str;
- }
- }
-
- format = gst_sub_parse_data_format_autodetect (str);
- g_free (str);
-
- switch (format) {
- case GST_SUB_PARSE_FORMAT_MDVDSUB:
- GST_DEBUG ("MicroDVD format detected");
- caps = SUB_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_SUBRIP:
- GST_DEBUG ("SubRip format detected");
- caps = SUB_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_MPSUB:
- GST_DEBUG ("MPSub format detected");
- caps = SUB_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_SAMI:
- GST_DEBUG ("SAMI (time-based) format detected");
- caps = SAMI_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_TMPLAYER:
- GST_DEBUG ("TMPlayer (time based) format detected");
- caps = TMP_CAPS;
- break;
- /* FIXME: our MPL2 typefinding is not really good enough to warrant
- * returning a high probability (however, since we registered our
- * typefinder here with a rank of MARGINAL we should pretty much only
- * be called if most other typefinders have already run */
- case GST_SUB_PARSE_FORMAT_MPL2:
- GST_DEBUG ("MPL2 (time based) format detected");
- caps = MPL2_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_SUBVIEWER:
- GST_DEBUG ("SubViewer format detected");
- caps = SUB_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_DKS:
- GST_DEBUG ("DKS format detected");
- caps = DKS_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_QTTEXT:
- GST_DEBUG ("QTtext format detected");
- caps = QTTEXT_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_LRC:
- GST_DEBUG ("LRC format detected");
- caps = LRC_CAPS;
- break;
- case GST_SUB_PARSE_FORMAT_VTT:
- GST_DEBUG ("WebVTT format detected");
- caps = VTT_CAPS;
- break;
- default:
- case GST_SUB_PARSE_FORMAT_UNKNOWN:
- GST_DEBUG ("no subtitle format detected");
- return;
- }
-
- /* if we're here, it's ok */
- gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps);
-}
-
-static gboolean
-plugin_init (GstPlugin * plugin)
-{
- GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser");
-
- if (!gst_type_find_register (plugin, "subparse_typefind", GST_RANK_MARGINAL,
- gst_subparse_type_find, "srt,sub,mpsub,mdvd,smi,txt,dks,vtt",
- SUB_CAPS, NULL, NULL))
- return FALSE;
-
- if (!gst_element_register (plugin, "subparse",
- GST_RANK_PRIMARY, GST_TYPE_SUBPARSE) ||
- !gst_element_register (plugin, "ssaparse",
- GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE)) {
- return FALSE;
- }
-
- return TRUE;
-}
-
-GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
- GST_VERSION_MINOR,
- subparse,
- "Subtitle parsing",
- plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)
#include <gst/gst.h>
#include <gst/base/gstadapter.h>
-GST_DEBUG_CATEGORY_EXTERN (sub_parse_debug);
-#define GST_CAT_DEFAULT sub_parse_debug
+#include "gstsubparseelements.h"
G_BEGIN_DECLS
#define GST_TYPE_SUBPARSE (gst_sub_parse_get_type ())
G_DECLARE_FINAL_TYPE (GstSubParse, gst_sub_parse, GST, SUBPARSE, GstElement)
-/* format enum */
-typedef enum
-{
- GST_SUB_PARSE_FORMAT_UNKNOWN = 0,
- GST_SUB_PARSE_FORMAT_MDVDSUB = 1,
- GST_SUB_PARSE_FORMAT_SUBRIP = 2,
- GST_SUB_PARSE_FORMAT_MPSUB = 3,
- GST_SUB_PARSE_FORMAT_SAMI = 4,
- GST_SUB_PARSE_FORMAT_TMPLAYER = 5,
- GST_SUB_PARSE_FORMAT_MPL2 = 6,
- GST_SUB_PARSE_FORMAT_SUBVIEWER = 7,
- GST_SUB_PARSE_FORMAT_DKS = 8,
- GST_SUB_PARSE_FORMAT_QTTEXT = 9,
- GST_SUB_PARSE_FORMAT_LRC = 10,
- GST_SUB_PARSE_FORMAT_VTT = 11
-} GstSubParseFormat;
typedef struct {
int state;
--- /dev/null
+/* GStreamer
+ * Copyright (C) 2020 Huawei Technologies Co., Ltd.
+ * @Author: Stéphane Cerveau <scerveau@collabora.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "gstsubparseelements.h"
+
+GST_DEBUG_CATEGORY (sub_parse_debug);
+
+/* regex type enum */
+typedef enum
+{
+ GST_SUB_PARSE_REGEX_UNKNOWN = 0,
+ GST_SUB_PARSE_REGEX_MDVDSUB = 1,
+ GST_SUB_PARSE_REGEX_SUBRIP = 2,
+ GST_SUB_PARSE_REGEX_DKS = 3,
+ GST_SUB_PARSE_REGEX_VTT = 4,
+} GstSubParseRegex;
+
+static gpointer
+gst_sub_parse_data_format_autodetect_regex_once (GstSubParseRegex regtype)
+{
+ gpointer result = NULL;
+ GError *gerr = NULL;
+ switch (regtype) {
+ case GST_SUB_PARSE_REGEX_MDVDSUB:
+ result =
+ (gpointer) g_regex_new ("^\\{[0-9]+\\}\\{[0-9]+\\}",
+ G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
+ if (result == NULL) {
+ g_warning ("Compilation of mdvd regex failed: %s", gerr->message);
+ g_clear_error (&gerr);
+ }
+ break;
+ case GST_SUB_PARSE_REGEX_SUBRIP:
+ result = (gpointer)
+ g_regex_new ("^[\\s\\n]*[\\n]? {0,3}[ 0-9]{1,4}\\s*(\x0d)?\x0a"
+ " ?[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,3}"
+ " +--> +[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,2}",
+ G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
+ if (result == NULL) {
+ g_warning ("Compilation of subrip regex failed: %s", gerr->message);
+ g_clear_error (&gerr);
+ }
+ break;
+ case GST_SUB_PARSE_REGEX_DKS:
+ result = (gpointer) g_regex_new ("^\\[[0-9]+:[0-9]+:[0-9]+\\].*",
+ G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
+ if (result == NULL) {
+ g_warning ("Compilation of dks regex failed: %s", gerr->message);
+ g_clear_error (&gerr);
+ }
+ break;
+ case GST_SUB_PARSE_REGEX_VTT:
+ result = (gpointer)
+ g_regex_new ("^(\\xef\\xbb\\xbf)?WEBVTT[\\xa\\xd\\x20\\x9]", 0, 0,
+ &gerr);
+ if (result == NULL) {
+ g_warning ("Compilation of vtt regex failed: %s", gerr->message);
+ g_error_free (gerr);
+ }
+ break;
+
+ default:
+ GST_WARNING ("Trying to allocate regex of unknown type %u", regtype);
+ }
+ return result;
+}
+
+/*
+ * FIXME: maybe we should pass along a second argument, the preceding
+ * text buffer, because that is how this originally worked, even though
+ * I don't really see the use of that.
+ */
+
+GstSubParseFormat
+gst_sub_parse_data_format_autodetect (gchar * match_str)
+{
+ guint n1, n2, n3;
+
+ static GOnce mdvd_rx_once = G_ONCE_INIT;
+ static GOnce subrip_rx_once = G_ONCE_INIT;
+ static GOnce dks_rx_once = G_ONCE_INIT;
+ static GOnce vtt_rx_once = G_ONCE_INIT;
+
+ GRegex *mdvd_grx;
+ GRegex *subrip_grx;
+ GRegex *dks_grx;
+ GRegex *vtt_grx;
+
+ g_once (&mdvd_rx_once,
+ (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
+ (gpointer) GST_SUB_PARSE_REGEX_MDVDSUB);
+ g_once (&subrip_rx_once,
+ (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
+ (gpointer) GST_SUB_PARSE_REGEX_SUBRIP);
+ g_once (&dks_rx_once,
+ (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
+ (gpointer) GST_SUB_PARSE_REGEX_DKS);
+ g_once (&vtt_rx_once,
+ (GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
+ (gpointer) GST_SUB_PARSE_REGEX_VTT);
+
+ mdvd_grx = (GRegex *) mdvd_rx_once.retval;
+ subrip_grx = (GRegex *) subrip_rx_once.retval;
+ dks_grx = (GRegex *) dks_rx_once.retval;
+ vtt_grx = (GRegex *) vtt_rx_once.retval;
+
+ if (g_regex_match (mdvd_grx, match_str, 0, NULL)) {
+ GST_LOG ("MicroDVD (frame based) format detected");
+ return GST_SUB_PARSE_FORMAT_MDVDSUB;
+ }
+ if (g_regex_match (subrip_grx, match_str, 0, NULL)) {
+ GST_LOG ("SubRip (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_SUBRIP;
+ }
+ if (g_regex_match (dks_grx, match_str, 0, NULL)) {
+ GST_LOG ("DKS (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_DKS;
+ }
+ if (g_regex_match (vtt_grx, match_str, 0, NULL) == TRUE) {
+ GST_LOG ("WebVTT (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_VTT;
+ }
+
+ if (!strncmp (match_str, "FORMAT=TIME", 11)) {
+ GST_LOG ("MPSub (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_MPSUB;
+ }
+ if (strstr (match_str, "<SAMI>") != NULL ||
+ strstr (match_str, "<sami>") != NULL) {
+ GST_LOG ("SAMI (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_SAMI;
+ }
+ /* we're boldly assuming the first subtitle appears within the first hour */
+ if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
+ sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
+ sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 ||
+ sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 ||
+ sscanf (match_str, "00:%02u:%02u,%u=", &n1, &n2, &n3) == 3) {
+ GST_LOG ("TMPlayer (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_TMPLAYER;
+ }
+ if (sscanf (match_str, "[%u][%u]", &n1, &n2) == 2) {
+ GST_LOG ("MPL2 (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_MPL2;
+ }
+ if (strstr (match_str, "[INFORMATION]") != NULL) {
+ GST_LOG ("SubViewer (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_SUBVIEWER;
+ }
+ if (strstr (match_str, "{QTtext}") != NULL) {
+ GST_LOG ("QTtext (time based) format detected");
+ return GST_SUB_PARSE_FORMAT_QTTEXT;
+ }
+ /* We assume the LRC file starts immediately */
+ if (match_str[0] == '[') {
+ gboolean all_lines_good = TRUE;
+ gchar **split;
+ gchar **ptr;
+
+ ptr = split = g_strsplit (match_str, "\n", -1);
+ while (*ptr && *(ptr + 1)) {
+ gchar *str = *ptr;
+ gint len = strlen (str);
+
+ if (sscanf (str, "[%u:%02u.%02u]", &n1, &n2, &n3) == 3 ||
+ sscanf (str, "[%u:%02u.%03u]", &n1, &n2, &n3) == 3) {
+ all_lines_good = TRUE;
+ } else if (str[len - 1] == ']' && strchr (str, ':') != NULL) {
+ all_lines_good = TRUE;
+ } else {
+ all_lines_good = FALSE;
+ break;
+ }
+
+ ptr++;
+ }
+ g_strfreev (split);
+
+ if (all_lines_good)
+ return GST_SUB_PARSE_FORMAT_LRC;
+ }
+
+ GST_DEBUG ("no subtitle format detected");
+ return GST_SUB_PARSE_FORMAT_UNKNOWN;
+}
+
+gchar *
+gst_sub_parse_gst_convert_to_utf8 (const gchar * str, gsize len,
+ const gchar * encoding, gsize * consumed, GError ** err)
+{
+ gchar *ret = NULL;
+
+ *consumed = 0;
+ /* The char cast is necessary in glib < 2.24 */
+ ret =
+ g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
+ consumed, NULL, err);
+ if (ret == NULL)
+ return ret;
+
+ /* + 3 to skip UTF-8 BOM if it was added */
+ len = strlen (ret);
+ if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
+ && (guint8) ret[2] == 0xBF)
+ memmove (ret, ret + 3, len + 1 - 3);
+
+ return ret;
+}
+
+gchar *
+gst_sub_parse_detect_encoding (const gchar * str, gsize len)
+{
+ if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
+ && (guint8) str[2] == 0xBF)
+ return g_strdup ("UTF-8");
+
+ if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
+ return g_strdup ("UTF-16BE");
+
+ if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
+ return g_strdup ("UTF-16LE");
+
+ if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
+ && (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
+ return g_strdup ("UTF-32BE");
+
+ if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
+ && (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
+ return g_strdup ("UTF-32LE");
+
+ return NULL;
+}
+
+/*
+ * Typefind support.
+ */
+
+/* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so;
+ * also, give different subtitle formats really different types */
+static GstStaticCaps mpl2_caps =
+GST_STATIC_CAPS ("application/x-subtitle-mpl2");
+#define SUB_CAPS (gst_static_caps_get (&sub_caps))
+
+static GstStaticCaps tmp_caps =
+GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
+#define TMP_CAPS (gst_static_caps_get (&tmp_caps))
+
+static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
+#define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
+
+static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
+#define SAMI_CAPS (gst_static_caps_get (&smi_caps))
+
+static GstStaticCaps dks_caps = GST_STATIC_CAPS ("application/x-subtitle-dks");
+#define DKS_CAPS (gst_static_caps_get (&dks_caps))
+
+static GstStaticCaps vtt_caps = GST_STATIC_CAPS ("application/x-subtitle-vtt");
+#define VTT_CAPS (gst_static_caps_get (&vtt_caps))
+
+static GstStaticCaps qttext_caps =
+GST_STATIC_CAPS ("application/x-subtitle-qttext");
+#define QTTEXT_CAPS (gst_static_caps_get (&qttext_caps))
+
+static GstStaticCaps lrc_caps = GST_STATIC_CAPS ("application/x-subtitle-lrc");
+#define LRC_CAPS (gst_static_caps_get (&lrc_caps))
+
+static void
+gst_sub_parse_type_find (GstTypeFind * tf, gpointer private)
+{
+ GstSubParseFormat format;
+ const guint8 *data;
+ GstCaps *caps;
+ gchar *str;
+ gchar *encoding = NULL;
+ const gchar *end;
+
+ if (!(data = gst_type_find_peek (tf, 0, 129)))
+ return;
+
+ /* make sure string passed to _autodetect() is NUL-terminated */
+ str = g_malloc0 (129);
+ memcpy (str, data, 128);
+
+ if ((encoding = gst_sub_parse_detect_encoding (str, 128)) != NULL) {
+ gchar *converted_str;
+ GError *err = NULL;
+ gsize tmp;
+
+ converted_str =
+ gst_sub_parse_gst_convert_to_utf8 (str, 128, encoding, &tmp, &err);
+ if (converted_str == NULL) {
+ GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
+ err->message);
+ g_clear_error (&err);
+ } else {
+ g_free (str);
+ str = converted_str;
+ }
+ g_free (encoding);
+ }
+
+ /* Check if at least the first 120 chars are valid UTF8,
+ * otherwise convert as always */
+ if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) {
+ gchar *converted_str;
+ gsize tmp;
+ const gchar *enc;
+
+ enc = g_getenv ("GST_SUBTITLE_ENCODING");
+ if (enc == NULL || *enc == '\0') {
+ /* if local encoding is UTF-8 and no encoding specified
+ * via the environment variable, assume ISO-8859-15 */
+ if (g_get_charset (&enc)) {
+ enc = "ISO-8859-15";
+ }
+ }
+ converted_str =
+ gst_sub_parse_gst_convert_to_utf8 (str, 128, enc, &tmp, NULL);
+ if (converted_str != NULL) {
+ g_free (str);
+ str = converted_str;
+ }
+ }
+
+ format = gst_sub_parse_data_format_autodetect (str);
+ g_free (str);
+
+ switch (format) {
+ case GST_SUB_PARSE_FORMAT_MDVDSUB:
+ GST_DEBUG ("MicroDVD format detected");
+ caps = SUB_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_SUBRIP:
+ GST_DEBUG ("SubRip format detected");
+ caps = SUB_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_MPSUB:
+ GST_DEBUG ("MPSub format detected");
+ caps = SUB_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_SAMI:
+ GST_DEBUG ("SAMI (time-based) format detected");
+ caps = SAMI_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_TMPLAYER:
+ GST_DEBUG ("TMPlayer (time based) format detected");
+ caps = TMP_CAPS;
+ break;
+ /* FIXME: our MPL2 typefinding is not really good enough to warrant
+ * returning a high probability (however, since we registered our
+ * typefinder here with a rank of MARGINAL we should pretty much only
+ * be called if most other typefinders have already run */
+ case GST_SUB_PARSE_FORMAT_MPL2:
+ GST_DEBUG ("MPL2 (time based) format detected");
+ caps = MPL2_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_SUBVIEWER:
+ GST_DEBUG ("SubViewer format detected");
+ caps = SUB_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_DKS:
+ GST_DEBUG ("DKS format detected");
+ caps = DKS_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_QTTEXT:
+ GST_DEBUG ("QTtext format detected");
+ caps = QTTEXT_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_LRC:
+ GST_DEBUG ("LRC format detected");
+ caps = LRC_CAPS;
+ break;
+ case GST_SUB_PARSE_FORMAT_VTT:
+ GST_DEBUG ("WebVTT format detected");
+ caps = VTT_CAPS;
+ break;
+ default:
+ case GST_SUB_PARSE_FORMAT_UNKNOWN:
+ GST_DEBUG ("no subtitle format detected");
+ return;
+ }
+
+ /* if we're here, it's ok */
+ gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps);
+}
+
+GST_TYPE_FIND_REGISTER_DEFINE (subparse, "subparse_typefind", GST_RANK_MARGINAL,
+ gst_sub_parse_type_find, "srt,sub,mpsub,mdvd,smi,txt,dks,vtt", SUB_CAPS,
+ NULL, NULL)
+
+ gboolean
+sub_parse_element_init (GstPlugin * plugin)
+{
+ static gsize res = FALSE;
+ gboolean ret = TRUE;
+ if (g_once_init_enter (&res)) {
+ GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser");
+
+ ret |= GST_TYPE_FIND_REGISTER (subparse, plugin);
+
+ g_once_init_leave (&res, TRUE);
+ }
+ return ret;
+}
--- /dev/null
+/* GStreamer
+ * Copyright (C) <2002> David A. Schleef <ds@schleef.org>
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ * Copyright (C) 2020 Huawei Technologies Co., Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_SUBPARSE_ELEMENT_H__
+#define __GST_SUBPARSE_ELEMENT_H__
+
+#include <gst/gst.h>
+
+/* format enum */
+typedef enum
+{
+ GST_SUB_PARSE_FORMAT_UNKNOWN = 0,
+ GST_SUB_PARSE_FORMAT_MDVDSUB = 1,
+ GST_SUB_PARSE_FORMAT_SUBRIP = 2,
+ GST_SUB_PARSE_FORMAT_MPSUB = 3,
+ GST_SUB_PARSE_FORMAT_SAMI = 4,
+ GST_SUB_PARSE_FORMAT_TMPLAYER = 5,
+ GST_SUB_PARSE_FORMAT_MPL2 = 6,
+ GST_SUB_PARSE_FORMAT_SUBVIEWER = 7,
+ GST_SUB_PARSE_FORMAT_DKS = 8,
+ GST_SUB_PARSE_FORMAT_QTTEXT = 9,
+ GST_SUB_PARSE_FORMAT_LRC = 10,
+ GST_SUB_PARSE_FORMAT_VTT = 11
+} GstSubParseFormat;
+
+
+G_GNUC_INTERNAL GstSubParseFormat gst_sub_parse_data_format_autodetect (gchar * match_str);
+G_GNUC_INTERNAL gchar * gst_sub_parse_detect_encoding (const gchar * str, gsize len);
+G_GNUC_INTERNAL gchar * gst_sub_parse_gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
+ gsize * consumed, GError ** err);
+G_GNUC_INTERNAL gboolean sub_parse_element_init (GstPlugin * plugin);
+
+GST_ELEMENT_REGISTER_DECLARE (subparse);
+GST_ELEMENT_REGISTER_DECLARE (ssaparse);
+
+GST_TYPE_FIND_REGISTER_DECLARE (subparse);
+
+GST_DEBUG_CATEGORY_EXTERN (sub_parse_debug);
+#define GST_CAT_DEFAULT sub_parse_debug
+
+#endif /* __GST_SUBPARSE_ELEMENT_H__ */
--- /dev/null
+/* GStreamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ * Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
+ * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
+ * Copyright (C) 2016 Philippe Normand <pnormand@igalia.com>
+ * Copyright (C) 2016 Jan Schmidt <jan@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstsubparseelements.h"
+
+static gboolean
+plugin_init (GstPlugin * plugin)
+{
+ gboolean ret = FALSE;
+
+ ret |= GST_ELEMENT_REGISTER (subparse, plugin);
+ ret |= GST_ELEMENT_REGISTER (ssaparse, plugin);
+
+ return ret;
+}
+
+GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
+ GST_VERSION_MINOR,
+ subparse,
+ "Subtitle parsing",
+ plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)
subparse_sources = [
'gstssaparse.c',
'gstsubparse.c',
+ 'gstsubparseelement.c',
+ 'gstsubparseplugin.c',
'samiparse.c',
'tmplayerparse.c',
'mpl2parse.c',