1 /* GStreamer SAMI subtitle parser
2 * Copyright (c) 2006 Young-Ho Cha <ganadist at chollian net>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
20 #include "samiparse.h"
22 /* FIXME: use Makefile stuff */
23 #ifndef GST_DISABLE_LOADSAVE_REGISTRY
24 #include <libxml/HTMLparser.h>
27 #define ITALIC_TAG 'i'
33 typedef struct _GstSamiContext GstSamiContext;
35 struct _GstSamiContext
37 GString *buf; /* buffer to collect content */
38 GString *rubybuf; /* buffer to collect ruby content */
39 GString *resultbuf; /* when opening the next 'sync' tag, move
40 * from 'buf' to avoid to append following
42 GString *state; /* in many sami files there are tags that
43 * are not closed, so for each open tag the
44 * parser will append a tag flag here so
45 * that tags can be closed properly on
46 * 'sync' tags. See _context_push_state()
47 * and _context_pop_state(). */
48 htmlParserCtxtPtr htmlctxt; /* html parser context */
49 gboolean has_result; /* set when ready to push out result */
50 gboolean in_title; /* flag to avoid appending the title content
52 guint64 time1; /* previous start attribute in sync tag */
53 guint64 time2; /* current start attribute in sync tag */
57 has_tag (GString * str, const gchar tag)
59 return strrchr (str->str, tag);
63 sami_context_push_state (GstSamiContext * sctx, char state)
65 g_string_append_c (sctx->state, state);
69 sami_context_pop_state (GstSamiContext * sctx, char state)
71 GString *str = g_string_new ("");
72 GString *context_state = sctx->state;
75 for (i = context_state->len - 1; i >= 0; i--) {
76 switch (context_state->str[i]) {
77 case ITALIC_TAG: /* <i> */
79 g_string_append (str, "</i>");
82 case COLOR_TAG: /* <span foreground= > */
84 g_string_append (str, "</span>");
87 case RUBY_TAG: /* <span size= > -- ruby */
91 case RT_TAG: /* ruby */
93 /* FIXME: support for furigana/ruby once implemented in pango */
94 g_string_append (sctx->rubybuf, "</span>");
95 if (has_tag (context_state, ITALIC_TAG)) {
96 g_string_append (sctx->rubybuf, "</i>");
104 if (context_state->str[i] == state) {
105 g_string_append (sctx->buf, str->str);
106 g_string_free (str, TRUE);
107 g_string_truncate (context_state, i);
111 if (state == CLEAR_TAG) {
112 g_string_append (sctx->buf, str->str);
113 g_string_truncate (context_state, 0);
115 g_string_free (str, TRUE);
119 handle_start_sync (GstSamiContext * sctx, const xmlChar ** atts)
123 sami_context_pop_state (sctx, CLEAR_TAG);
125 for (i = 0; (atts[i] != NULL); i += 2) {
126 const xmlChar *key, *value;
133 if (!xmlStrncmp ((const xmlChar *) "start", key, 5)) {
134 sctx->time1 = sctx->time2;
135 sctx->time2 = atoi ((const char *) value) * GST_MSECOND;
136 sctx->has_result = TRUE;
137 g_string_append (sctx->resultbuf, sctx->buf->str);
138 g_string_truncate (sctx->buf, 0);
145 handle_start_font (GstSamiContext * sctx, const xmlChar ** atts)
149 sami_context_pop_state (sctx, COLOR_TAG);
151 for (i = 0; (atts[i] != NULL); i += 2) {
152 const xmlChar *key, *value;
159 if (!xmlStrncmp ((const xmlChar *) "color", key, 5)) {
161 * There are invalid color value in many
163 * It will fix hex color value that start without '#'
166 int len = xmlStrlen (value);
168 if (!(*value == '#' && len == 7)) {
171 strtol ((const char *) value, &r, 16); /* trying onvert hex */
172 if (((xmlChar *) r == (value + 6) && len == 6)) {
176 /* silver colour can be found in many sami files, but X RGB database
177 * doesn't contain a colour by this name, so map explicitly */
178 if (!xmlStrncmp ((const xmlChar *) "silver", value, 6)) {
179 value = (const xmlChar *) "#c0c0c0";
181 g_string_append_printf (sctx->buf, "<span foreground=\"%s%s\">", sharp,
183 sami_context_push_state (sctx, COLOR_TAG);
190 start_sami_element (void *ctx, const xmlChar * name, const xmlChar ** atts)
192 GstSamiContext *sctx = (GstSamiContext *) ctx;
194 if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) {
195 sctx->in_title = TRUE;
196 } else if (!xmlStrncmp ((const xmlChar *) "sync", name, 4)) {
197 handle_start_sync (sctx, atts);
198 } else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
199 handle_start_font (sctx, atts);
200 } else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
201 sami_context_push_state (sctx, RUBY_TAG);
202 } else if (!xmlStrncmp ((const xmlChar *) "br", name, 2)) {
203 g_string_append_c (sctx->buf, '\n');
204 /* FIXME: support for furigana/ruby once implemented in pango */
205 } else if (!xmlStrncmp ((const xmlChar *) "rt", name, 2)) {
206 if (has_tag (sctx->state, ITALIC_TAG)) {
207 g_string_append (sctx->rubybuf, "<i>");
209 g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>");
210 sami_context_push_state (sctx, RT_TAG);
211 } else if (!xmlStrncmp ((const xmlChar *) "p", name, 1)) {
212 } else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
213 g_string_append (sctx->buf, "<i>");
214 sami_context_push_state (sctx, ITALIC_TAG);
219 end_sami_element (void *ctx, const xmlChar * name)
221 GstSamiContext *sctx = (GstSamiContext *) ctx;
223 if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) {
224 sctx->in_title = FALSE;
225 } else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
226 sami_context_pop_state (sctx, COLOR_TAG);
227 } else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
228 sami_context_pop_state (sctx, RUBY_TAG);
229 } else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
230 sami_context_pop_state (sctx, ITALIC_TAG);
235 characters_sami (void *ctx, const xmlChar * ch, int len)
237 GstSamiContext *sctx = (GstSamiContext *) ctx;
244 escaped = g_markup_escape_text ((const gchar *) ch, len);
245 if (has_tag (sctx->state, RT_TAG)) {
246 g_string_append_c (sctx->rubybuf, ' ');
247 g_string_append (sctx->rubybuf, escaped);
248 g_string_append_c (sctx->rubybuf, ' ');
250 g_string_append (sctx->buf, escaped);
255 static xmlSAXHandler samiSAXHandlerStruct = {
256 NULL, /* internalSubset */
257 NULL, /* isStandalone */
258 NULL, /* hasInternalSubset */
259 NULL, /* hasExternalSubset */
260 NULL, /* resolveEntity */
261 NULL, /* getEntity */
262 NULL, /* entityDecl */
263 NULL, /* notationDecl */
264 NULL, /* attributeDecl */
265 NULL, /* elementDecl */
266 NULL, /* unparsedEntityDecl */
267 NULL, /* setDocumentLocator */
268 NULL, /* startDocument */
269 NULL, /* endDocument */
270 start_sami_element, /* startElement */
271 end_sami_element, /* endElement */
272 NULL, /* reference */
273 characters_sami, /* characters */
274 NULL, /* ignorableWhitespace */
275 NULL, /* processingInstruction */
277 NULL, /* xmlParserWarning */
278 NULL, /* xmlParserError */
279 NULL, /* xmlParserError */
280 NULL, /* getParameterEntity */
281 NULL, /* cdataBlock */
282 NULL, /* externalSubset */
285 NULL, /* startElementNsSAX2Func */
286 NULL, /* endElementNsSAX2Func */
287 NULL /* xmlStructuredErrorFunc */
289 static xmlSAXHandlerPtr samiSAXHandler = &samiSAXHandlerStruct;
292 sami_context_init (ParserState * state)
294 GstSamiContext *context;
296 g_assert (state->user_data == NULL);
297 state->user_data = (gpointer) g_new0 (GstSamiContext, 1);
298 context = (GstSamiContext *) state->user_data;
300 context->htmlctxt = htmlCreatePushParserCtxt (samiSAXHandler, context,
301 "", 0, NULL, XML_CHAR_ENCODING_UTF8);
302 context->buf = g_string_new ("");
303 context->rubybuf = g_string_new ("");
304 context->resultbuf = g_string_new ("");
305 context->state = g_string_new ("");
309 sami_context_deinit (ParserState * state)
311 GstSamiContext *context = (GstSamiContext *) state->user_data;
314 htmlParserCtxtPtr htmlctxt = context->htmlctxt;
316 /* destroy sax context */
319 htmlParseChunk (htmlctxt, "", 0, 1);
320 doc = htmlctxt->myDoc;
321 htmlFreeParserCtxt (htmlctxt);
322 context->htmlctxt = NULL;
325 g_string_free (context->buf, TRUE);
326 g_string_free (context->rubybuf, TRUE);
327 g_string_free (context->resultbuf, TRUE);
328 g_string_free (context->state, TRUE);
330 state->user_data = NULL;
335 sami_context_reset (ParserState * state)
337 GstSamiContext *context = (GstSamiContext *) state->user_data;
340 g_string_truncate (context->buf, 0);
341 g_string_truncate (context->rubybuf, 0);
342 g_string_truncate (context->resultbuf, 0);
343 g_string_truncate (context->state, 0);
344 context->has_result = FALSE;
345 context->in_title = FALSE;
353 parse_sami (ParserState * state, const gchar * line)
355 GstSamiContext *context = (GstSamiContext *) state->user_data;
357 htmlParseChunk (context->htmlctxt, line, strlen (line), 0);
358 if (context->has_result) {
361 if (context->rubybuf->len) {
362 context->rubybuf = g_string_append_c (context->rubybuf, '\n');
363 g_string_prepend (context->resultbuf, context->rubybuf->str);
364 context->rubybuf = g_string_truncate (context->rubybuf, 0);
367 r = g_string_free (context->resultbuf, FALSE);
368 context->resultbuf = g_string_new ("");
369 state->start_time = context->time1;
370 state->duration = context->time2 - context->time1;
371 context->has_result = FALSE;
377 #else /* GST_DISABLE_LOADSAVE_REGISTRY */
380 parse_sami (ParserState * state, const gchar * line)
382 /* our template caps should not include sami in this case */
383 g_assert_not_reached ();
387 sami_context_init (ParserState * state)
393 sami_context_deinit (ParserState * state)
399 sami_context_reset (ParserState * state)
404 #endif /* GST_DISABLE_LOADSAVE_REGISTRY */