1 /* GStreamer SAMI subtitle parser
2 * Copyright (c) 2006 Young-Ho Cha <ganadist at chollian net>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
20 #include "samiparse.h"
22 /* FIXME: use Makefile stuff */
23 #ifndef GST_DISABLE_LOADSAVE_REGISTRY
24 #include <libxml/HTMLparser.h>
27 #define ITALIC_TAG 'i'
33 typedef struct _GstSamiContext GstSamiContext;
35 struct _GstSamiContext
37 GString *buf; /* buffer to collect content */
38 GString *rubybuf; /* buffer to collect ruby content */
39 GString *resultbuf; /* when opening the next 'sync' tag, move
40 * from 'buf' to avoid to append following
42 GString *state; /* in many sami files there are tags that
43 * are not closed, so for each open tag the
44 * parser will append a tag flag here so
45 * that tags can be closed properly on
46 * 'sync' tags. See _context_push_state()
47 * and _context_pop_state(). */
48 htmlParserCtxtPtr htmlctxt; /* html parser context */
49 gboolean has_result; /* set when ready to push out result */
50 gboolean in_title; /* flag to avoid appending the title content
52 guint64 time1; /* previous start attribute in sync tag */
53 guint64 time2; /* current start attribute in sync tag */
57 has_tag (GString * str, const gchar tag)
59 return strrchr (str->str, tag);
63 sami_context_push_state (GstSamiContext * sctx, char state)
65 g_string_append_c (sctx->state, state);
69 sami_context_pop_state (GstSamiContext * sctx, char state)
71 GString *str = g_string_new ("");
72 GString *context_state = sctx->state;
75 for (i = context_state->len - 1; i >= 0; i--) {
76 switch (context_state->str[i]) {
77 case ITALIC_TAG: /* <i> */
79 g_string_append (str, "</i>");
82 case COLOR_TAG: /* <span foreground= > */
84 g_string_append (str, "</span>");
87 case RUBY_TAG: /* <span size= > -- ruby */
91 case RT_TAG: /* ruby */
93 /* FIXME: support for furigana/ruby once implemented in pango */
94 g_string_append (sctx->rubybuf, "</span>");
95 if (has_tag (context_state, ITALIC_TAG)) {
96 g_string_append (sctx->rubybuf, "</i>");
104 if (context_state->str[i] == state) {
105 g_string_append (sctx->buf, str->str);
106 g_string_free (str, TRUE);
107 g_string_truncate (context_state, i);
111 if (state == CLEAR_TAG) {
112 g_string_append (sctx->buf, str->str);
113 g_string_truncate (context_state, 0);
115 g_string_free (str, TRUE);
119 handle_start_sync (GstSamiContext * sctx, const xmlChar ** atts)
123 sami_context_pop_state (sctx, CLEAR_TAG);
125 for (i = 0; (atts[i] != NULL); i += 2) {
126 const xmlChar *key, *value;
133 if (!xmlStrncmp ((const xmlChar *) "start", key, 5)) {
134 sctx->time1 = sctx->time2;
135 sctx->time2 = atoi ((const char *) value) * GST_MSECOND;
136 sctx->has_result = TRUE;
137 g_string_append (sctx->resultbuf, sctx->buf->str);
138 g_string_truncate (sctx->buf, 0);
145 handle_start_font (GstSamiContext * sctx, const xmlChar ** atts)
149 sami_context_pop_state (sctx, COLOR_TAG);
151 for (i = 0; (atts[i] != NULL); i += 2) {
152 const xmlChar *key, *value;
159 if (!xmlStrncmp ((const xmlChar *) "color", key, 5)) {
161 * There are invalid color value in many
163 * It will fix hex color value that start without '#'
166 int len = xmlStrlen (value);
168 if (!(*value == '#' && len == 7)) {
171 strtol ((const char *) value, &r, 16); /* trying onvert hex */
172 if (((xmlChar *) r == (value + 6) && len == 6)) {
176 /* silver colour can be found in many sami files, but X RGB database
177 * doesn't contain a colour by this name, so map explicitly */
178 if (!xmlStrncmp ((const xmlChar *) "silver", value, 6)) {
179 value = (const xmlChar *) "#c0c0c0";
181 g_string_append_printf (sctx->buf, "<span foreground=\"%s%s\">", sharp,
183 sami_context_push_state (sctx, COLOR_TAG);
190 start_sami_element (void *ctx, const xmlChar * name, const xmlChar ** atts)
192 GstSamiContext *sctx = (GstSamiContext *) ctx;
194 if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) {
195 sctx->in_title = TRUE;
196 } else if (!xmlStrncmp ((const xmlChar *) "sync", name, 4)) {
197 handle_start_sync (sctx, atts);
198 } else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
199 handle_start_font (sctx, atts);
200 } else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
201 sami_context_push_state (sctx, RUBY_TAG);
202 } else if (!xmlStrncmp ((const xmlChar *) "br", name, 2)) {
203 g_string_append_c (sctx->buf, '\n');
204 /* FIXME: support for furigana/ruby once implemented in pango */
205 } else if (!xmlStrncmp ((const xmlChar *) "rt", name, 2)) {
206 if (has_tag (sctx->state, ITALIC_TAG)) {
207 g_string_append (sctx->rubybuf, "<i>");
209 g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>");
210 sami_context_push_state (sctx, RT_TAG);
211 } else if (!xmlStrncmp ((const xmlChar *) "p", name, 1)) {
212 } else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
213 g_string_append (sctx->buf, "<i>");
214 sami_context_push_state (sctx, ITALIC_TAG);
219 end_sami_element (void *ctx, const xmlChar * name)
221 GstSamiContext *sctx = (GstSamiContext *) ctx;
223 if (!xmlStrncmp ((const xmlChar *) "title", name, 5)) {
224 sctx->in_title = FALSE;
225 } else if (!xmlStrncmp ((const xmlChar *) "font", name, 4)) {
226 sami_context_pop_state (sctx, COLOR_TAG);
227 } else if (!xmlStrncmp ((const xmlChar *) "ruby", name, 4)) {
228 sami_context_pop_state (sctx, RUBY_TAG);
229 } else if (!xmlStrncmp ((const xmlChar *) "i", name, 1)) {
230 sami_context_pop_state (sctx, ITALIC_TAG);
235 characters_sami (void *ctx, const xmlChar * ch, int len)
237 GstSamiContext *sctx = (GstSamiContext *) ctx;
243 if (has_tag (sctx->state, RT_TAG)) {
244 g_string_append_c (sctx->rubybuf, ' ');
245 g_string_append_len (sctx->rubybuf, (const gchar *) ch, len);
246 g_string_append_c (sctx->rubybuf, ' ');
248 g_string_append_len (sctx->buf, (const gchar *) ch, len);
252 static xmlSAXHandler samiSAXHandlerStruct = {
253 NULL, /* internalSubset */
254 NULL, /* isStandalone */
255 NULL, /* hasInternalSubset */
256 NULL, /* hasExternalSubset */
257 NULL, /* resolveEntity */
258 NULL, /* getEntity */
259 NULL, /* entityDecl */
260 NULL, /* notationDecl */
261 NULL, /* attributeDecl */
262 NULL, /* elementDecl */
263 NULL, /* unparsedEntityDecl */
264 NULL, /* setDocumentLocator */
265 NULL, /* startDocument */
266 NULL, /* endDocument */
267 start_sami_element, /* startElement */
268 end_sami_element, /* endElement */
269 NULL, /* reference */
270 characters_sami, /* characters */
271 NULL, /* ignorableWhitespace */
272 NULL, /* processingInstruction */
274 NULL, /* xmlParserWarning */
275 NULL, /* xmlParserError */
276 NULL, /* xmlParserError */
277 NULL, /* getParameterEntity */
278 NULL, /* cdataBlock */
279 NULL, /* externalSubset */
282 NULL, /* startElementNsSAX2Func */
283 NULL, /* endElementNsSAX2Func */
284 NULL /* xmlStructuredErrorFunc */
286 static xmlSAXHandlerPtr samiSAXHandler = &samiSAXHandlerStruct;
289 sami_context_init (ParserState * state)
291 GstSamiContext *context;
293 g_assert (state->user_data == NULL);
294 state->user_data = (gpointer) g_new0 (GstSamiContext, 1);
295 context = (GstSamiContext *) state->user_data;
297 context->htmlctxt = htmlCreatePushParserCtxt (samiSAXHandler, context,
298 "", 0, NULL, XML_CHAR_ENCODING_UTF8);
299 context->buf = g_string_new ("");
300 context->rubybuf = g_string_new ("");
301 context->resultbuf = g_string_new ("");
302 context->state = g_string_new ("");
306 sami_context_deinit (ParserState * state)
308 GstSamiContext *context = (GstSamiContext *) state->user_data;
311 htmlParserCtxtPtr htmlctxt = context->htmlctxt;
313 /* destroy sax context */
316 htmlParseChunk (htmlctxt, "", 0, 1);
317 doc = htmlctxt->myDoc;
318 htmlFreeParserCtxt (htmlctxt);
319 context->htmlctxt = NULL;
322 g_string_free (context->buf, TRUE);
323 g_string_free (context->rubybuf, TRUE);
324 g_string_free (context->resultbuf, TRUE);
325 g_string_free (context->state, TRUE);
327 state->user_data = NULL;
332 sami_context_reset (ParserState * state)
334 GstSamiContext *context = (GstSamiContext *) state->user_data;
337 g_string_truncate (context->buf, 0);
338 g_string_truncate (context->rubybuf, 0);
339 g_string_truncate (context->resultbuf, 0);
340 g_string_truncate (context->state, 0);
341 context->has_result = FALSE;
342 context->in_title = FALSE;
350 parse_sami (ParserState * state, const gchar * line)
352 GstSamiContext *context = (GstSamiContext *) state->user_data;
354 htmlParseChunk (context->htmlctxt, line, strlen (line), 0);
355 if (context->has_result) {
358 if (context->rubybuf->len) {
359 context->rubybuf = g_string_append_c (context->rubybuf, '\n');
360 g_string_prepend (context->resultbuf, context->rubybuf->str);
361 context->rubybuf = g_string_truncate (context->rubybuf, 0);
364 r = g_string_free (context->resultbuf, FALSE);
365 context->resultbuf = g_string_new ("");
366 state->start_time = context->time1;
367 state->duration = context->time2 - context->time1;
368 context->has_result = FALSE;
374 #else /* GST_DISABLE_LOADSAVE_REGISTRY */
377 parse_sami (ParserState * state, const gchar * line)
379 /* our template caps should not include sami in this case */
380 g_assert_not_reached ();
384 sami_context_init (ParserState * state)
390 sami_context_deinit (ParserState * state)
396 sami_context_reset (ParserState * state)
401 #endif /* GST_DISABLE_LOADSAVE_REGISTRY */