From e0cbb1036cc61dad84687854ab85064226edd449 Mon Sep 17 00:00:00 2001 From: Young-Ho Cha Date: Mon, 15 May 2006 09:14:35 +0000 Subject: [PATCH] gst/subparse/: Add support for SAMI subtitles (#169936). Original commit message from CVS: Patch by: Young-Ho Cha * gst/subparse/Makefile.am: * gst/subparse/gstsubparse.c: (gst_sub_parse_dispose), (parser_state_dispose), (gst_sub_parse_data_format_autodetect), (gst_sub_parse_format_autodetect), (feed_textbuf), (gst_subparse_type_find), (plugin_init): * gst/subparse/gstsubparse.h: * gst/subparse/samiparse.c: * gst/subparse/samiparse.h: Add support for SAMI subtitles (#169936). --- ChangeLog | 14 ++ gst/subparse/Makefile.am | 9 +- gst/subparse/gstsubparse.c | 47 +++++- gst/subparse/gstsubparse.h | 8 +- gst/subparse/samiparse.c | 396 +++++++++++++++++++++++++++++++++++++++++++++ gst/subparse/samiparse.h | 38 +++++ 6 files changed, 500 insertions(+), 12 deletions(-) create mode 100644 gst/subparse/samiparse.c create mode 100644 gst/subparse/samiparse.h diff --git a/ChangeLog b/ChangeLog index 8fdd537..c8b3ee5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2006-05-15 Tim-Philipp Müller + + Patch by: Young-Ho Cha + + * gst/subparse/Makefile.am: + * gst/subparse/gstsubparse.c: (gst_sub_parse_dispose), + (parser_state_dispose), (gst_sub_parse_data_format_autodetect), + (gst_sub_parse_format_autodetect), (feed_textbuf), + (gst_subparse_type_find), (plugin_init): + * gst/subparse/gstsubparse.h: + * gst/subparse/samiparse.c: + * gst/subparse/samiparse.h: + Add support for SAMI subtitles (#169936). + 2006-05-14 Michael Smith * gst/audioconvert/gstchannelmix.c: (gst_channel_mix_fill_others): diff --git a/gst/subparse/Makefile.am b/gst/subparse/Makefile.am index 6a4c4e3..6726ae0 100644 --- a/gst/subparse/Makefile.am +++ b/gst/subparse/Makefile.am @@ -2,7 +2,11 @@ plugin_LTLIBRARIES = libgstsubparse.la libgstsubparse_la_SOURCES = \ gstssaparse.c \ - gstsubparse.c + gstssaparse.h \ + gstsubparse.c \ + gstsubparse.h \ + samiparse.c \ + samiparse.h libgstsubparse_la_CFLAGS = $(GST_CFLAGS) libgstsubparse_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) @@ -10,4 +14,5 @@ libgstsubparse_la_LIBADD = $(GST_LIBS) noinst_HEADERS = \ gstssaparse.h \ - gstsubparse.h + gstsubparse.h \ + samiparse.h diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index 60ad39f..8e2f6bf 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -29,6 +29,7 @@ #include "gstsubparse.h" #include "gstssaparse.h" +#include "samiparse.h" GST_DEBUG_CATEGORY_STATIC (sub_parse_debug); #define GST_CAT_DEFAULT sub_parse_debug @@ -40,11 +41,19 @@ GST_ELEMENT_DETAILS ("Subtitle parser", "Gustavo J. A. M. Carneiro \n" "Ronald S. Bultje "); +#ifndef GST_DISABLE_LOADSAVE_REGISTRY +static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("application/x-subtitle; application/x-subtitle-sami") + ); +#else static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("application/x-subtitle") ); +#endif static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, @@ -114,6 +123,7 @@ gst_sub_parse_dispose (GObject * object) gst_segment_free (subparse->segment); subparse->segment = NULL; } + sami_context_deinit (&subparse->state); GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object)); } @@ -569,6 +579,9 @@ parser_state_dispose (ParserState * state) g_string_free (state->buf, TRUE); state->buf = NULL; } + if (state->user_data) { + sami_context_reset (state); + } } /* @@ -602,19 +615,23 @@ gst_sub_parse_data_format_autodetect (gchar * match_str) } if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) { - GST_LOG ("subparse: MicroDVD (frame based) format detected"); + GST_LOG ("MicroDVD (frame based) format detected"); return GST_SUB_PARSE_FORMAT_MDVDSUB; } if (regexec (&subrip_rx, match_str, 0, NULL, 0) == 0) { - GST_LOG ("subparse: SubRip (time based) format detected"); + GST_LOG ("SubRip (time based) format detected"); return GST_SUB_PARSE_FORMAT_SUBRIP; } if (!strncmp (match_str, "FORMAT=TIME", 11)) { - GST_LOG ("subparse: MPSub (time based) format detected"); + GST_LOG ("MPSub (time based) format detected"); return GST_SUB_PARSE_FORMAT_MPSUB; } + if (!g_ascii_strncasecmp (match_str, "", 6)) { + GST_LOG ("SAMI (time based) format detected"); + return GST_SUB_PARSE_FORMAT_SAMI; + } - GST_WARNING ("subparse: subtitle format autodetection failed!"); + GST_DEBUG ("no subtitle format detected"); return GST_SUB_PARSE_FORMAT_UNKNOWN; } @@ -646,6 +663,10 @@ gst_sub_parse_format_autodetect (GstSubParse * self) case GST_SUB_PARSE_FORMAT_MPSUB: self->parse_line = parse_mpsub; return gst_caps_new_simple ("text/plain", NULL); + case GST_SUB_PARSE_FORMAT_SAMI: + self->parse_line = parse_sami; + sami_context_init (&self->state); + return gst_caps_new_simple ("text/x-pango-markup", NULL); case GST_SUB_PARSE_FORMAT_UNKNOWN: default: GST_DEBUG ("no subtitle format detected"); @@ -662,6 +683,7 @@ feed_textbuf (GstSubParse * self, GstBuffer * buf) /* flush the parser state */ parser_state_init (&self->state); g_string_truncate (self->textbuf, 0); + sami_context_reset (&self->state); } self->textbuf = g_string_append_len (self->textbuf, @@ -866,15 +888,18 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition) * Typefind support. */ +static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami"); static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle"); #define SUB_CAPS (gst_static_caps_get (&sub_caps)) +#define SAMI_CAPS (gst_static_caps_get (&smi_caps)) static void gst_subparse_type_find (GstTypeFind * tf, gpointer private) { - const guint8 *data; GstSubParseFormat format; + const guint8 *data; + GstCaps *caps; gchar *str; if (!(data = gst_type_find_peek (tf, 0, 36))) @@ -888,26 +913,34 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) switch (format) { case GST_SUB_PARSE_FORMAT_MDVDSUB: GST_DEBUG ("MicroDVD format detected"); + caps = SUB_CAPS; break; case GST_SUB_PARSE_FORMAT_SUBRIP: GST_DEBUG ("SubRip format detected"); + caps = SUB_CAPS; break; case GST_SUB_PARSE_FORMAT_MPSUB: GST_DEBUG ("MPSub format detected"); + caps = SUB_CAPS; + break; + case GST_SUB_PARSE_FORMAT_SAMI: + GST_DEBUG ("SAMI (time-based) format detected"); + caps = SAMI_CAPS; break; + default: case GST_SUB_PARSE_FORMAT_UNKNOWN: GST_DEBUG ("no subtitle format detected"); return; } /* if we're here, it's ok */ - gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SUB_CAPS); + gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps); } static gboolean plugin_init (GstPlugin * plugin) { - static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", NULL }; + static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", "smi", NULL }; GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser"); diff --git a/gst/subparse/gstsubparse.h b/gst/subparse/gstsubparse.h index 6757aad..b057d40 100644 --- a/gst/subparse/gstsubparse.h +++ b/gst/subparse/gstsubparse.h @@ -30,10 +30,10 @@ G_BEGIN_DECLS #define GST_SUBPARSE(obj) \ (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_SUBPARSE, GstSubParse)) #define GST_SUBPARSE_CLASS(klass) \ - (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_SUBPARSE, GstSubParse)) + (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_SUBPARSE, GstSubParseClass)) #define GST_IS_SUBPARSE(obj) \ (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_SUBPARSE)) -#define GST_IS_SUBPARSE_CLASS(obj) \ +#define GST_IS_SUBPARSE_CLASS(klass) \ (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_SUBPARSE)) typedef struct _GstSubParse GstSubParse; @@ -45,7 +45,8 @@ typedef enum GST_SUB_PARSE_FORMAT_UNKNOWN = 0, GST_SUB_PARSE_FORMAT_MDVDSUB = 1, GST_SUB_PARSE_FORMAT_SUBRIP = 2, - GST_SUB_PARSE_FORMAT_MPSUB = 3 + GST_SUB_PARSE_FORMAT_MPSUB = 3, + GST_SUB_PARSE_FORMAT_SAMI = 4 } GstSubParseFormat; typedef struct { @@ -54,6 +55,7 @@ typedef struct { guint64 start_time; guint64 duration; GstSegment *segment; + gpointer user_data; } ParserState; typedef gchar* (*Parser) (ParserState *state, const gchar *line); diff --git a/gst/subparse/samiparse.c b/gst/subparse/samiparse.c new file mode 100644 index 0000000..4068a27 --- /dev/null +++ b/gst/subparse/samiparse.c @@ -0,0 +1,396 @@ +/* GStreamer SAMI subtitle parser + * Copyright (c) 2006 Young-Ho Cha + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "samiparse.h" + +/* FIXME: use Makefile stuff */ +#ifndef GST_DISABLE_LOADSAVE_REGISTRY +#include +#include + +#define ITALIC_TAG 'i' +#define COLOR_TAG 'c' +#define RUBY_TAG 'r' +#define RT_TAG 't' +#define CLEAR_TAG '0' + +typedef struct _GstSamiContext GstSamiContext; + +struct _GstSamiContext +{ + GString *buf; /* buffer to collect content */ + GString *rubybuf; /* buffer to collect ruby content */ + GString *resultbuf; /* when opening the next 'sync' tag, move + * from 'buf' to avoid to append following + * content */ + GString *state; /* in many sami files there are tags that + * are not closed, so for each open tag the + * parser will append a tag flag here so + * that tags can be closed properly on + * 'sync' tags. See _context_push_state() + * and _context_pop_state(). */ + htmlParserCtxtPtr htmlctxt; /* html parser context */ + gboolean has_result; /* set when ready to push out result */ + gboolean in_title; /* flag to avoid appending the title content + * to buf */ + guint64 time1; /* previous start attribute in sync tag */ + guint64 time2; /* current start attribute in sync tag */ +}; + +static gchar * +has_tag (GString * str, const gchar tag) +{ + return strrchr (str->str, tag); +} + +static void +sami_context_push_state (GstSamiContext * sctx, char state) +{ + g_string_append_c (sctx->state, state); +} + +static void +sami_context_pop_state (GstSamiContext * sctx, char state) +{ + GString *str = g_string_new (""); + GString *context_state = sctx->state; + int i; + + for (i = context_state->len - 1; i >= 0; i--) { + switch (context_state->str[i]) { + case ITALIC_TAG: /* */ + { + g_string_append (str, ""); + break; + } + case COLOR_TAG: /* */ + { + g_string_append (str, ""); + break; + } + case RUBY_TAG: /* -- ruby */ + { + break; + } + case RT_TAG: /* ruby */ + { + /* FIXME: support for furigana/ruby once implemented in pango */ + g_string_append (sctx->rubybuf, ""); + if (has_tag (context_state, ITALIC_TAG)) { + g_string_append (sctx->rubybuf, ""); + } + + break; + } + default: + break; + } + if (context_state->str[i] == state) { + g_string_append (sctx->buf, str->str); + g_string_free (str, TRUE); + g_string_truncate (context_state, i); + return; + } + } + if (state == CLEAR_TAG) { + g_string_append (sctx->buf, str->str); + g_string_truncate (context_state, 0); + } + g_string_free (str, TRUE); +} + +static void +handle_start_sync (GstSamiContext * sctx, const xmlChar ** atts) +{ + int i; + + sami_context_pop_state (sctx, CLEAR_TAG); + if (atts != NULL) { + for (i = 0; (atts[i] != NULL); i += 2) { + const xmlChar *key, *value; + + key = atts[i]; + value = atts[i + 1]; + + if (!value) + continue; + if (!xmlStrncmp ((const xmlChar *) "start", key, 5)) { + sctx->time1 = sctx->time2; + sctx->time2 = atoi ((const char *) value) * GST_MSECOND; + sctx->has_result = TRUE; + g_string_append (sctx->resultbuf, sctx->buf->str); + g_string_truncate (sctx->buf, 0); + } + } + } +} + +static void +handle_start_font (GstSamiContext * sctx, const xmlChar ** atts) +{ + int i; + + sami_context_pop_state (sctx, COLOR_TAG); + if (atts != NULL) { + for (i = 0; (atts[i] != NULL); i += 2) { + const xmlChar *key, *value; + + key = atts[i]; + value = atts[i + 1]; + + if (!value) + continue; + if (!xmlStrncmp ((const xmlChar *) "color", key, 5)) { + /* + * There are invalid color value in many + * sami files. + * It will fix hex color value that start without '#' + */ + gchar *sharp = ""; + int len = xmlStrlen (value); + + if (!(*value == '#' && len == 7)) { + gchar *r; + + strtol ((const char *) value, &r, 16); /* trying onvert hex */ + if (((xmlChar *) r == (value + 6) && len == 6)) { + sharp = "#"; + } + } + g_string_append_printf (sctx->buf, "", sharp, + value); + sami_context_push_state (sctx, COLOR_TAG); + } + } + } +} + +static void +start_sami_element (void *ctx, const xmlChar * name, const xmlChar ** atts) +{ + GstSamiContext *sctx = (GstSamiContext *) ctx; + + if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) { + sctx->in_title = TRUE; + } else if (!xmlStrncmp ((const xmlChar *) "sync", name, 4)) { + handle_start_sync (sctx, atts); + } else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) { + handle_start_font (sctx, atts); + } else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) { + sami_context_push_state (sctx, RUBY_TAG); + } else if (!xmlStrncmp ((const xmlChar *) "br", name, 2)) { + g_string_append_c (sctx->buf, '\n'); + /* FIXME: support for furigana/ruby once implemented in pango */ + } else if (!xmlStrncmp ((const xmlChar *) "rt", name, 2)) { + if (has_tag (sctx->state, ITALIC_TAG)) { + g_string_append (sctx->rubybuf, ""); + } + g_string_append (sctx->rubybuf, ""); + sami_context_push_state (sctx, RT_TAG); + } else if (!xmlStrncmp ((const xmlChar *) "p", name, 1)) { + } else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) { + g_string_append (sctx->buf, ""); + sami_context_push_state (sctx, ITALIC_TAG); + } +} + +static void +end_sami_element (void *ctx, const xmlChar * name) +{ + GstSamiContext *sctx = (GstSamiContext *) ctx; + + if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) { + sctx->in_title = FALSE; + } else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) { + sami_context_pop_state (sctx, COLOR_TAG); + } else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) { + sami_context_pop_state (sctx, RUBY_TAG); + } else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) { + sami_context_pop_state (sctx, ITALIC_TAG); + } +} + +static void +characters_sami (void *ctx, const xmlChar * ch, int len) +{ + GstSamiContext *sctx = (GstSamiContext *) ctx; + + /* skip title */ + if (sctx->in_title) + return; + + if (has_tag (sctx->state, RT_TAG)) { + g_string_append_c (sctx->rubybuf, ' '); + g_string_append_len (sctx->rubybuf, (const gchar *) ch, len); + g_string_append_c (sctx->rubybuf, ' '); + } else { + g_string_append_len (sctx->buf, (const gchar *) ch, len); + } +} + +static xmlSAXHandler samiSAXHandlerStruct = { + NULL, /* internalSubset */ + NULL, /* isStandalone */ + NULL, /* hasInternalSubset */ + NULL, /* hasExternalSubset */ + NULL, /* resolveEntity */ + NULL, /* getEntity */ + NULL, /* entityDecl */ + NULL, /* notationDecl */ + NULL, /* attributeDecl */ + NULL, /* elementDecl */ + NULL, /* unparsedEntityDecl */ + NULL, /* setDocumentLocator */ + NULL, /* startDocument */ + NULL, /* endDocument */ + start_sami_element, /* startElement */ + end_sami_element, /* endElement */ + NULL, /* reference */ + characters_sami, /* characters */ + NULL, /* ignorableWhitespace */ + NULL, /* processingInstruction */ + NULL, /* comment */ + NULL, /* xmlParserWarning */ + NULL, /* xmlParserError */ + NULL, /* xmlParserError */ + NULL, /* getParameterEntity */ + NULL, /* cdataBlock */ + NULL, /* externalSubset */ + 1, /* initialized */ + NULL, /* private */ + NULL, /* startElementNsSAX2Func */ + NULL, /* endElementNsSAX2Func */ + NULL /* xmlStructuredErrorFunc */ +}; +static xmlSAXHandlerPtr samiSAXHandler = &samiSAXHandlerStruct; + +void +sami_context_init (ParserState * state) +{ + GstSamiContext *context; + + g_assert (state->user_data == NULL); + state->user_data = (gpointer) g_new0 (GstSamiContext, 1); + context = (GstSamiContext *) state->user_data; + + context->htmlctxt = htmlCreatePushParserCtxt (samiSAXHandler, context, + "", 0, NULL, XML_CHAR_ENCODING_UTF8); + context->buf = g_string_new (""); + context->rubybuf = g_string_new (""); + context->resultbuf = g_string_new (""); + context->state = g_string_new (""); +} + +void +sami_context_deinit (ParserState * state) +{ + GstSamiContext *context = (GstSamiContext *) state->user_data; + + if (context) { + htmlParserCtxtPtr htmlctxt = context->htmlctxt; + + /* destroy sax context */ + htmlDocPtr doc; + + htmlParseChunk (htmlctxt, "", 0, 1); + doc = htmlctxt->myDoc; + htmlFreeParserCtxt (htmlctxt); + context->htmlctxt = NULL; + if (doc) + xmlFreeDoc (doc); + g_string_free (context->buf, TRUE); + g_string_free (context->rubybuf, TRUE); + g_string_free (context->resultbuf, TRUE); + g_string_free (context->state, TRUE); + g_free (context); + state->user_data = NULL; + } +} + +void +sami_context_reset (ParserState * state) +{ + GstSamiContext *context = (GstSamiContext *) state->user_data; + + if (context) { + g_string_truncate (context->buf, 0); + g_string_truncate (context->rubybuf, 0); + g_string_truncate (context->resultbuf, 0); + g_string_truncate (context->state, 0); + context->has_result = FALSE; + context->in_title = FALSE; + context->time1 = 0; + context->time2 = 0; + } +} + + +gchar * +parse_sami (ParserState * state, const gchar * line) +{ + GstSamiContext *context = (GstSamiContext *) state->user_data; + + htmlParseChunk (context->htmlctxt, line, strlen (line), 0); + if (context->has_result) { + gchar *r; + + if (context->rubybuf->len) { + context->rubybuf = g_string_append_c (context->rubybuf, '\n'); + g_string_prepend (context->resultbuf, context->rubybuf->str); + context->rubybuf = g_string_truncate (context->rubybuf, 0); + } + + r = g_string_free (context->resultbuf, FALSE); + context->resultbuf = g_string_new (""); + state->start_time = context->time1; + state->duration = context->time2 - context->time1; + context->has_result = FALSE; + return r; + } + return NULL; +} + +#else /* GST_DISABLE_LOADSAVE_REGISTRY */ + +gchar * +parse_sami (ParserState * state, const gchar * line) +{ + /* our template caps should not include sami in this case */ + g_assert_not_reached (); +} + +void +sami_context_init (ParserState * state) +{ + return; +} + +void +sami_context_deinit (ParserState * state) +{ + return; +} + +void +sami_context_reset (ParserState * state) +{ + return; +} + +#endif /* GST_DISABLE_LOADSAVE_REGISTRY */ diff --git a/gst/subparse/samiparse.h b/gst/subparse/samiparse.h new file mode 100644 index 0000000..a45de01 --- /dev/null +++ b/gst/subparse/samiparse.h @@ -0,0 +1,38 @@ +/* GStreamer SAMI subtitle parser + * Copyright (c) 2006 Young-Ho Cha + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef _SAMI_PARSE_H_ +#define _SAMI_PARSE_H_ + +#include "gstsubparse.h" + +G_BEGIN_DECLS + +gchar * parse_sami (ParserState * state, const gchar * line); + +void sami_context_init (ParserState * state); + +void sami_context_deinit (ParserState * state); + +void sami_context_reset (ParserState * state); + +G_END_DECLS + +#endif /* _SAMI_PARSE_H_ */ + -- 2.7.4