1 /* GStreamer SAMI subtitle parser
2 * Copyright (c) 2006, 2013 Young-Ho Cha <ganadist at gmail com>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
21 #include "samiparse.h"
27 #define ITALIC_TAG 'i'
33 typedef struct _HtmlParser HtmlParser;
34 typedef struct _HtmlContext HtmlContext;
35 typedef struct _GstSamiContext GstSamiContext;
36 #ifdef SUBPARSE_MODIFICATION
37 typedef struct _LanguageStruct GstLangStruct;
38 struct _LanguageStruct
43 #define MAX_LANGUAGE 10
45 struct _GstSamiContext
47 GString *buf; /* buffer to collect content */
48 GString *rubybuf; /* buffer to collect ruby content */
49 GString *resultbuf; /* when opening the next 'sync' tag, move
50 * from 'buf' to avoid to append following
52 GString *state; /* in many sami files there are tags that
53 * are not closed, so for each open tag the
54 * parser will append a tag flag here so
55 * that tags can be closed properly on
56 * 'sync' tags. See _context_push_state()
57 * and _context_pop_state(). */
58 HtmlContext *htmlctxt; /* html parser context */
59 gboolean has_result; /* set when ready to push out result */
60 gboolean in_sync; /* flag to avoid appending anything except the
61 * content of the sync elements to buf */
62 guint64 time1; /* previous start attribute in sync tag */
63 guint64 time2; /* current start attribute in sync tag */
64 #ifdef SUBPARSE_MODIFICATION
65 guint64 time3; /* To store the last current time when language is changed */
66 GList *lang_list; /* Language list for an external subtitle file */
67 gchar *current_language; /* Current language parsed */
68 gchar *desired_language; /* Language set by user */
69 gboolean language_changed; /* language changed signal */
70 gboolean end_body; /* </BODY> reached */
76 void (*start_element) (HtmlContext * ctx,
77 const gchar * name, const gchar ** attr, gpointer user_data);
78 void (*end_element) (HtmlContext * ctx,
79 const gchar * name, gpointer user_data);
80 void (*text) (HtmlContext * ctx,
81 const gchar * text, gsize text_len, gpointer user_data);
86 const HtmlParser *parser;
92 html_context_new (HtmlParser * parser, gpointer user_data)
94 HtmlContext *ctxt = (HtmlContext *) g_new0 (HtmlContext, 1);
95 ctxt->parser = parser;
96 ctxt->user_data = user_data;
97 ctxt->buf = g_string_new (NULL);
102 html_context_free (HtmlContext * ctxt)
104 g_string_free (ctxt->buf, TRUE);
110 const gunichar unescaped;
111 const gchar *escaped;
114 struct EntityMap XmlEntities[] = {
123 struct EntityMap HtmlEntities[] = {
124 /* nbsp will handle manually
377 unescape_string (const gchar * text)
380 GString *unescaped = g_string_new (NULL);
386 /* unescape   and */
387 if (!g_ascii_strncasecmp (text, "nbsp", 4)) {
388 unescaped = g_string_append_unichar (unescaped, 160);
396 /* pass xml entities. these will be processed as pango markup */
397 for (i = 0; XmlEntities[i].escaped; i++) {
398 gssize len = strlen (XmlEntities[i].escaped);
399 if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) {
400 unescaped = g_string_append_c (unescaped, '&');
402 g_string_append_len (unescaped, XmlEntities[i].escaped, len);
408 /* convert html entities */
409 for (i = 0; HtmlEntities[i].escaped; i++) {
410 gssize len = strlen (HtmlEntities[i].escaped);
411 if (!strncmp (text, HtmlEntities[i].escaped, len)) {
413 g_string_append_unichar (unescaped, HtmlEntities[i].unescaped);
420 gboolean is_hex = FALSE;
431 l = strtoul (text, &end, 16);
433 l = strtoul (text, &end, 10);
436 if (text == end || errno != 0) {
437 /* error occured. pass it */
440 unescaped = g_string_append_unichar (unescaped, l);
450 unescaped = g_string_append (unescaped, "&");
455 } else if (g_ascii_isspace (*text)) {
456 unescaped = g_string_append_c (unescaped, ' ');
457 /* strip whitespace */
460 } while ((*text) && g_ascii_isspace (*text));
462 unescaped = g_string_append_c (unescaped, *text);
467 return g_string_free (unescaped, FALSE);
471 string_token (const gchar * string, const gchar * delimiter, gchar ** first)
473 gchar *next = strstr (string, delimiter);
475 *first = g_strndup (string, next - string);
477 *first = g_strdup (string);
483 html_context_handle_element (HtmlContext * ctxt,
484 const gchar * string, gboolean must_close)
489 const gchar *found, *next;
490 #ifdef SUBPARSE_MODIFICATION
491 const gchar *name_temp = NULL;
494 /* split element name and attributes */
495 next = string_token (string, " ", &name);
498 /* count attributes */
501 found = strchr (found, '=');
511 attrs = g_new0 (gchar *, (count + 1) * 2);
513 for (i = 0; i < count; i += 2) {
514 gchar *attr_name = NULL, *attr_value = NULL;
517 #ifdef SUBPARSE_MODIFICATION
518 /* sometimes count can unnecessarily be high value, because of unrequired "=" in subtitle file.
519 * In that case it should not crash */
524 next = string_token (next + 1, "=", &attr_name);
526 #ifdef SUBPARSE_MODIFICATION
527 /* sometimes count can unnecessarily be high value, because of unrequired "=" in subtitle file.
528 * In that case it should not crash */
533 next = string_token (next + 1, " ", &attr_value);
535 /* strip " or ' from attribute value */
536 if (attr_value[0] == '"' || attr_value[0] == '\'') {
537 gchar *tmp = g_strdup (attr_value + 1);
542 length = strlen (attr_value);
543 if (attr_value[length - 1] == '"' || attr_value[length - 1] == '\'') {
544 attr_value[length - 1] = '\0';
547 attrs[i] = attr_name;
548 attrs[i + 1] = attr_value;
550 #ifdef SUBPARSE_MODIFICATION
551 /* sometimes spaces can be there in between !-- and P
552 * that also we have to take care */
553 if (!g_ascii_strcasecmp("!--", name)) {
554 gchar* tempchar = (gchar*)(string + 3);
555 while (*tempchar == ' ') {
557 if (*tempchar == 'P' || *tempchar == 'p') {
558 *(name + 3) = *tempchar;
565 if (next && (!g_ascii_strcasecmp("!--P", name))) {
568 /* count attributes */
571 found = (gchar*)strcasestr (found, "lang");
579 attrs = g_new0 (gchar *, count * 2);
581 for (i = 0; i < count; i++) {
582 gchar *attr_name = NULL, *attr_value = NULL;
584 next = (gchar*)strcasestr (next, "lang:");
585 attr_value = (gchar*)malloc (3);
587 strncpy (attr_value, next, 2);
588 attr_value[2] = '\0';
589 GST_LOG ("Language value comes as %s", attr_value);
592 if (*name_temp == '{') {
593 int character_count = 0;
598 if (*name_temp == '.') {
599 attr_name = (gchar*) malloc (character_count + 1);
602 else if (*name_temp != ' ')
610 for (j = 0; *(name_temp + j) != ' '; j++) {
611 attr_name[j] = *(name_temp + j);
614 attrs[attrindex++] = attr_name;
615 attrs[attrindex++] = attr_value;
621 ctxt->parser->start_element (ctxt, name,
622 (const gchar **) attrs, ctxt->user_data);
624 ctxt->parser->end_element (ctxt, name, ctxt->user_data);
631 html_context_parse (HtmlContext * ctxt, gchar * text, gsize text_len)
633 const gchar *next = NULL;
634 ctxt->buf = g_string_append_len (ctxt->buf, text, text_len);
635 next = ctxt->buf->str;
637 if (next[0] == '<') {
638 gchar *element = NULL;
639 /* find <blahblah> */
640 if (!strchr (next, '>')) {
641 /* no tag end point. buffer will be process in next time */
645 next = string_token (next, ">", &element);
647 if (g_str_has_suffix (next, "/")) {
649 element[strlen (element) - 1] = '\0';
650 html_context_handle_element (ctxt, element + 1, TRUE);
651 } else if (element[1] == '/') {
653 ctxt->parser->end_element (ctxt, element + 2, ctxt->user_data);
656 html_context_handle_element (ctxt, element + 1, FALSE);
659 } else if (strchr (next, '<')) {
662 next = string_token (next, "<", &text);
663 text = g_strstrip (text);
664 length = strlen (text);
665 ctxt->parser->text (ctxt, text, length, ctxt->user_data);
669 gchar *text = (gchar *) next;
671 text = g_strstrip (text);
672 length = strlen (text);
673 ctxt->parser->text (ctxt, text, length, ctxt->user_data);
674 ctxt->buf = g_string_assign (ctxt->buf, "");
679 ctxt->buf = g_string_assign (ctxt->buf, next);
683 has_tag (GString * str, const gchar tag)
685 return strrchr (str->str, tag);
689 sami_context_push_state (GstSamiContext * sctx, char state)
691 GST_LOG ("state %c", state);
692 g_string_append_c (sctx->state, state);
696 sami_context_pop_state (GstSamiContext * sctx, char state)
698 GString *str = g_string_new ("");
699 GString *context_state = sctx->state;
702 GST_LOG ("state %c", state);
703 for (i = context_state->len - 1; i >= 0; i--) {
704 switch (context_state->str[i]) {
705 case ITALIC_TAG: /* <i> */
707 g_string_append (str, "</i>");
710 case SPAN_TAG: /* <span foreground= > */
712 g_string_append (str, "</span>");
715 case RUBY_TAG: /* <span size= > -- ruby */
719 case RT_TAG: /* ruby */
721 /* FIXME: support for furigana/ruby once implemented in pango */
722 g_string_append (sctx->rubybuf, "</span>");
723 if (has_tag (context_state, ITALIC_TAG)) {
724 g_string_append (sctx->rubybuf, "</i>");
732 if (context_state->str[i] == state) {
733 g_string_append (sctx->buf, str->str);
734 g_string_free (str, TRUE);
735 g_string_truncate (context_state, i);
739 if (state == CLEAR_TAG) {
740 g_string_append (sctx->buf, str->str);
741 g_string_truncate (context_state, 0);
743 g_string_free (str, TRUE);
747 handle_start_sync (GstSamiContext * sctx, const gchar ** atts)
751 sami_context_pop_state (sctx, CLEAR_TAG);
753 for (i = 0; (atts[i] != NULL); i += 2) {
754 const gchar *key, *value;
761 if (!g_ascii_strcasecmp ("start", key)) {
762 /* Only set a new start time if we don't have text pending */
763 if (sctx->resultbuf->len == 0)
764 sctx->time1 = sctx->time2;
766 sctx->time2 = atoi ((const char *) value) * GST_MSECOND;
767 #ifdef SUBPARSE_MODIFICATION
768 sctx->time3 = sctx->time2;
770 sctx->time2 = MAX (sctx->time2, sctx->time1);
771 g_string_append (sctx->resultbuf, sctx->buf->str);
772 sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE;
773 g_string_truncate (sctx->buf, 0);
780 handle_start_font (GstSamiContext * sctx, const gchar ** atts)
784 sami_context_pop_state (sctx, SPAN_TAG);
786 g_string_append (sctx->buf, "<span");
787 for (i = 0; (atts[i] != NULL); i += 2) {
788 const gchar *key, *value;
795 if (!g_ascii_strcasecmp ("color", key)) {
797 * There are invalid color value in many
799 * It will fix hex color value that start without '#'
801 const gchar *sharp = "";
802 int len = strlen (value);
804 if (!(*value == '#' && len == 7)) {
807 /* check if it looks like hex */
808 if (strtol ((const char *) value, &r, 16) >= 0 &&
809 ((gchar *) r == (value + 6) && len == 6)) {
813 /* some colours can be found in many sami files, but X RGB database
814 * doesn't contain a colour by this name, so map explicitly */
815 if (!g_ascii_strcasecmp ("aqua", value)) {
817 } else if (!g_ascii_strcasecmp ("crimson", value)) {
819 } else if (!g_ascii_strcasecmp ("fuchsia", value)) {
821 } else if (!g_ascii_strcasecmp ("indigo", value)) {
823 } else if (!g_ascii_strcasecmp ("lime", value)) {
825 } else if (!g_ascii_strcasecmp ("olive", value)) {
827 } else if (!g_ascii_strcasecmp ("silver", value)) {
829 } else if (!g_ascii_strcasecmp ("teal", value)) {
832 g_string_append_printf (sctx->buf, " foreground=\"%s%s\"", sharp,
834 } else if (!g_ascii_strcasecmp ("face", key)) {
835 g_string_append_printf (sctx->buf, " font_family=\"%s\"", value);
838 g_string_append_c (sctx->buf, '>');
839 sami_context_push_state (sctx, SPAN_TAG);
843 #ifdef SUBPARSE_MODIFICATION
845 handle_p (GstSamiContext * sctx, const gchar ** atts)
850 for (i = 0; (atts[i] != NULL); i += 2) {
851 const gchar *key, *value;
856 if (sctx->current_language && value && strcmp(sctx->current_language, value)
857 && (sctx->time1 == sctx->time2))
858 sctx->language_changed = TRUE;
860 else if (!sctx->current_language)
861 sctx->current_language = (gchar*) malloc (128);
863 if (key && !g_ascii_strcasecmp ("class", key) && value) {
864 strcpy (sctx->current_language, value);
865 if (sctx->desired_language == NULL && key) {
866 sctx->desired_language = (gchar*) malloc (strlen(value) + 1);
867 strcpy(sctx->desired_language, value);
868 GST_LOG("no language list was found and desired lang was set to %s",sctx->desired_language);
871 if (sctx->language_changed)
874 sctx->time2 = sctx->time3;
875 sctx->language_changed = FALSE;
884 handle_start_language_list (GstSamiContext * sctx, const gchar ** atts)
888 GstLangStruct *new = NULL;
889 GstLangStruct *temp = NULL;
892 if (g_list_length (sctx->lang_list)) {
893 GST_LOG ("We already got the language list");
896 for (i = 0; (atts[attrIndex] != NULL); i++) {
897 const gchar *key, *value;
899 key = atts[attrIndex++];
900 value = atts[attrIndex++];
902 GST_LOG ("Inside handle_start_language_list key: %s, value: %s", key, value);
907 new = g_new0 (GstLangStruct, 1);
908 new->language_code = (gchar*) malloc (strlen(value) + 1);
909 if (new->language_code && value)
910 strcpy (new->language_code, value);
911 new->language_key = (gchar*) malloc (strlen(key) + 1);
912 if (new->language_key && key)
913 strcpy (new->language_key, key);
914 sctx->lang_list = g_list_append (sctx->lang_list, new);
915 temp = g_list_nth_data (sctx->lang_list, i);
916 if (sctx->desired_language == NULL && key){
917 sctx->desired_language = (gchar*) malloc (strlen(key) + 1);
918 strcpy(sctx->desired_language, key);
922 GST_LOG ("Inside handle_start_language_list of glist key: %s, value: %s",
923 temp->language_key, temp->language_code);
930 handle_start_element (HtmlContext * ctx, const gchar * name,
931 const char **atts, gpointer user_data)
933 GstSamiContext *sctx = (GstSamiContext *) user_data;
935 GST_LOG ("name:%s", name);
937 if (!g_ascii_strcasecmp ("sync", name)) {
938 handle_start_sync (sctx, atts);
939 sctx->in_sync = TRUE;
940 } else if (!g_ascii_strcasecmp ("font", name)) {
941 handle_start_font (sctx, atts);
942 } else if (!g_ascii_strcasecmp ("ruby", name)) {
943 sami_context_push_state (sctx, RUBY_TAG);
944 } else if (!g_ascii_strcasecmp ("br", name)) {
945 #ifdef SUBPARSE_MODIFICATION
946 if (sctx->current_language && sctx->desired_language &&
947 !strcmp(sctx->current_language, sctx->desired_language))
949 g_string_append_c (sctx->buf, '\n');
950 /* FIXME: support for furigana/ruby once implemented in pango */
951 } else if (!g_ascii_strcasecmp ("rt", name)) {
952 if (has_tag (sctx->state, ITALIC_TAG)) {
953 g_string_append (sctx->rubybuf, "<i>");
955 g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>");
956 sami_context_push_state (sctx, RT_TAG);
957 } else if (!g_ascii_strcasecmp ("i", name)) {
958 #ifdef SUBPARSE_MODIFICATION
959 if (sctx->current_language && sctx->desired_language &&
960 !strcmp(sctx->current_language, sctx->desired_language))
962 g_string_append (sctx->buf, "<i>");
963 sami_context_push_state (sctx, ITALIC_TAG);
964 } else if (!g_ascii_strcasecmp ("p", name)) {
965 #ifdef SUBPARSE_MODIFICATION
966 handle_p (sctx, atts);
967 } else if (!g_ascii_strcasecmp ("!--P", name)) {
968 handle_start_language_list (sctx, atts);
974 handle_end_element (HtmlContext * ctx, const char *name, gpointer user_data)
976 GstSamiContext *sctx = (GstSamiContext *) user_data;
978 GST_LOG ("name:%s", name);
980 if (!g_ascii_strcasecmp ("sync", name)) {
981 sctx->in_sync = FALSE;
982 } else if ((!g_ascii_strcasecmp ("body", name)) ||
983 (!g_ascii_strcasecmp ("sami", name))) {
984 /* We will usually have one buffer left when the body is closed
985 * as we need the next sync to actually send it */
987 #ifdef SUBPARSE_MODIFICATION
988 sctx->end_body = TRUE;
991 if (sctx->buf->len != 0) {
992 /* Only set a new start time if we don't have text pending */
993 if (sctx->resultbuf->len == 0)
994 sctx->time1 = sctx->time2;
996 sctx->time2 = GST_CLOCK_TIME_NONE;
997 g_string_append (sctx->resultbuf, sctx->buf->str);
998 sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE;
999 g_string_truncate (sctx->buf, 0);
1001 } else if (!g_ascii_strcasecmp ("font", name)) {
1002 sami_context_pop_state (sctx, SPAN_TAG);
1003 } else if (!g_ascii_strcasecmp ("ruby", name)) {
1004 sami_context_pop_state (sctx, RUBY_TAG);
1005 } else if (!g_ascii_strcasecmp ("i", name)) {
1006 sami_context_pop_state (sctx, ITALIC_TAG);
1011 handle_text (HtmlContext * ctx, const gchar * text, gsize text_len,
1014 GstSamiContext *sctx = (GstSamiContext *) user_data;
1016 /* Skip everything except content of the sync elements */
1019 #ifdef SUBPARSE_MODIFICATION
1020 if (has_tag (sctx->state, RT_TAG) && (sctx->current_language && sctx->desired_language &&
1021 !strcmp(sctx->current_language, sctx->desired_language))) {
1023 if (has_tag (sctx->state, RT_TAG)) {
1025 g_string_append_c (sctx->rubybuf, ' ');
1026 g_string_append (sctx->rubybuf, text);
1027 g_string_append_c (sctx->rubybuf, ' ');
1029 #ifdef SUBPARSE_MODIFICATION
1030 if (sctx->current_language && sctx->desired_language &&
1031 !strcmp(sctx->current_language, sctx->desired_language))
1033 g_string_append (sctx->buf, text);
1037 static HtmlParser samiParser = {
1038 handle_start_element, /* start_element */
1039 handle_end_element, /* end_element */
1040 handle_text, /* text */
1044 sami_context_init (ParserState * state)
1046 GstSamiContext *context;
1048 g_assert (state->user_data == NULL);
1050 context = g_new0 (GstSamiContext, 1);
1052 context->htmlctxt = html_context_new (&samiParser, context);
1053 context->buf = g_string_new ("");
1054 context->rubybuf = g_string_new ("");
1055 context->resultbuf = g_string_new ("");
1056 context->state = g_string_new ("");
1057 #ifdef SUBPARSE_MODIFICATION
1058 context->current_language = NULL;
1059 context->desired_language = NULL;
1060 context->lang_list = NULL;
1061 context->language_changed = FALSE;
1062 context->end_body = FALSE;
1064 state->user_data = context;
1068 sami_context_deinit (ParserState * state)
1070 GstSamiContext *context = (GstSamiContext *) state->user_data;
1071 #ifdef SUBPARSE_MODIFICATION
1072 GstLangStruct *temp = NULL;
1076 html_context_free (context->htmlctxt);
1077 context->htmlctxt = NULL;
1078 g_string_free (context->buf, TRUE);
1079 g_string_free (context->rubybuf, TRUE);
1080 g_string_free (context->resultbuf, TRUE);
1081 g_string_free (context->state, TRUE);
1082 #ifdef SUBPARSE_MODIFICATION
1083 if (context->lang_list) {
1084 while ((temp = g_list_nth_data (context->lang_list, i))) {
1085 if (temp->language_code)
1086 free (temp->language_code);
1087 temp->language_code = NULL;
1088 if (temp->language_key)
1089 free (temp->language_key);
1090 temp->language_key = NULL;
1094 g_list_free (context->lang_list);
1096 context->lang_list = NULL;
1098 if (context->current_language)
1099 free (context->current_language);
1100 context->current_language = NULL;
1102 context->desired_language = NULL;
1105 state->user_data = NULL;
1110 sami_context_reset (ParserState * state)
1112 GstSamiContext *context = (GstSamiContext *) state->user_data;
1115 g_string_truncate (context->buf, 0);
1116 g_string_truncate (context->rubybuf, 0);
1117 g_string_truncate (context->resultbuf, 0);
1118 g_string_truncate (context->state, 0);
1119 context->has_result = FALSE;
1120 context->in_sync = FALSE;
1126 #ifdef SUBPARSE_MODIFICATION
1128 sami_context_change_language (ParserState * state)
1130 GstSamiContext *context = (GstSamiContext *) state->user_data;
1131 GST_LOG ("**********desired language was %s**************", context->desired_language);
1132 free (context->desired_language);
1133 if(state->current_language) {
1134 context->desired_language = state->current_language;
1136 context->desired_language = state->msl_language;
1138 GST_LOG ("desired language changed to %s", context->desired_language);
1142 sami_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
1143 gsize * consumed, GError ** err, GstSubParse * self)
1147 /* The char cast is necessary in glib < 2.24 */
1149 g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
1150 consumed, NULL, err);
1154 GST_DEBUG_OBJECT (self, "g_convert_with_fallback returns NULL");
1158 /* + 3 to skip UTF-8 BOM if it was added */
1160 if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
1161 && (guint8) ret[2] == 0xBF)
1162 g_memmove (ret, ret + 3, len + 1 - 3);
1168 sami_validate_langlist_body(GList * lang_list, GstSubParse * self){
1169 gchar * file_path_type = NULL;
1170 gchar * file_path = NULL;
1173 guint i = 0, found_count = 0;
1174 const guint list_len = g_list_length(lang_list);
1175 gboolean counter[MAX_LANGUAGE];
1178 gchar *language_code;
1179 gchar *language_key;
1183 GstStructure *structure;
1184 const GValue *value;
1185 structure = gst_structure_new ("FileSrcURI",
1186 "file-uri", G_TYPE_STRING, NULL, NULL);
1188 cquery = gst_query_new_application (GST_QUERY_CUSTOM, structure);
1190 if (!gst_pad_peer_query (self->sinkpad, cquery))
1192 GST_DEBUG_OBJECT (self, "failed to query SMI file path");
1193 gst_query_unref (cquery);
1196 structure = gst_query_get_structure (cquery);
1197 value = gst_structure_get_value (structure, "file-uri");
1198 file_path = g_strdup (g_value_get_string (value));
1200 if (file_path == NULL){
1201 GST_DEBUG_OBJECT (self, "could not parse the SMI file path");
1202 gst_query_unref (cquery);
1205 gst_query_unref (cquery);
1207 GST_INFO_OBJECT (self, "file path comes as %s", file_path);
1209 file_path_type = g_strndup ((gchar *) file_path, 4);
1210 GST_INFO_OBJECT (self, "received file path by query = %s,%s", file_path,file_path_type);
1211 if (!g_strcmp0(file_path_type, "file")){
1213 GST_INFO_OBJECT (self, "file path comes as %s", file_path);
1215 fp = fopen (file_path, "r");
1217 GST_DEBUG_OBJECT (self, "failed to open file");
1221 for(i=0;i<list_len;i++){
1225 while(!feof(fp) && found_count < list_len){
1227 gsize * consumed = NULL;
1229 guint charCount = 0;
1230 gchar* result = NULL;
1232 gchar* temp_lang = NULL;
1233 gchar * temp1 = NULL;
1234 gchar *con_temp_lang = NULL;
1235 gchar *con_temp = NULL;
1236 gboolean conversion = TRUE;
1237 charCount = fread (line, sizeof(char), 1024, fp);
1239 GST_WARNING_OBJECT (self, "fread returned zero bytes");
1242 GST_DEBUG("value of detected encoding is %s and self encoding is %s",self->detected_encoding,self->encoding);
1243 if (self->detected_encoding && strcmp (self->detected_encoding, "UTF-8") && conversion){
1244 result = sami_convert_to_utf8 (line, charCount, self->detected_encoding, consumed, &err, self);
1246 if(result == NULL) {
1250 con_temp = g_utf8_strdown (result,strlen(result));
1253 con_temp = g_strstr_len(con_temp, strlen(con_temp),"class=");
1255 temp1 = g_strstr_len(con_temp+1, strlen(con_temp),"class=");
1257 if(temp1 && con_temp){
1258 gap = strlen(con_temp)-strlen(temp1);
1259 }else if(con_temp) {
1260 gap = strlen(con_temp);
1265 for(i=0;i<list_len;i++){
1266 if(counter[i]==TRUE){
1267 con_temp=con_temp+1;
1270 lang = (struct LangStruct *) g_list_nth_data(lang_list,i);
1272 temp_lang = (gchar*)g_malloc(strlen(lang->language_key)+1);
1273 strcpy(temp_lang,lang->language_key);
1274 con_temp_lang = g_utf8_strdown (temp_lang,strlen(temp_lang));
1275 if(g_strstr_len(con_temp,gap,con_temp_lang)){
1278 GST_INFO_OBJECT (self, " valid Language in list : [%s]", lang->language_key);
1279 con_temp=con_temp+1;
1282 g_free(con_temp_lang);
1294 if(found_count < list_len){
1295 for(i=0;i<list_len;i++){
1296 if(counter[i]==FALSE)
1297 lang_list = g_list_delete_link(lang_list,g_list_nth(lang_list,i));
1306 parse_sami (ParserState * state, const gchar * line)
1309 #ifdef SUBPARSE_MODIFICATION
1310 gint64 clip_start = 0, clip_stop = 0;
1311 gboolean in_seg = FALSE;
1313 GstSamiContext *context = (GstSamiContext *) state->user_data;
1315 gchar *unescaped = unescape_string (line);
1316 html_context_parse (context->htmlctxt, (gchar *) unescaped,
1317 strlen (unescaped));
1318 #ifdef SUBPARSE_MODIFICATION
1319 if (context->lang_list)
1320 state->language_list = context->lang_list;
1322 if (context->desired_language)
1323 state->current_language = context->desired_language;
1326 #ifdef SUBPARSE_MODIFICATION
1327 if (context->desired_language && context->current_language) {
1328 if ((!strcmp(context->current_language, context->desired_language)) || context->end_body) {
1330 if (context->has_result) {
1331 if (context->rubybuf->len) {
1332 context->rubybuf = g_string_append_c (context->rubybuf, '\n');
1333 g_string_prepend (context->resultbuf, context->rubybuf->str);
1334 context->rubybuf = g_string_truncate (context->rubybuf, 0);
1337 ret = g_string_free (context->resultbuf, FALSE);
1338 context->resultbuf = g_string_new ("");
1339 state->start_time = context->time1;
1340 state->duration = context->time2 - context->time1;
1341 context->has_result = FALSE;
1343 #ifdef SUBPARSE_MODIFICATION
1344 context->end_body = FALSE;
1347 /* Check our segment start/stop */
1348 in_seg = gst_segment_clip (state->segment, GST_FORMAT_TIME,
1349 state->start_time, state->start_time + state->duration, &clip_start,
1352 /* No need to send that text if it's out of segment */
1354 state->start_time = clip_start;
1355 state->duration = clip_stop - clip_start;