1 /* GStreamer SAMI subtitle parser
2 * Copyright (c) 2006, 2013 Young-Ho Cha <ganadist at gmail com>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
21 #include "samiparse.h"
27 #define ITALIC_TAG 'i'
33 typedef struct _HtmlParser HtmlParser;
34 typedef struct _HtmlContext HtmlContext;
35 typedef struct _GstSamiContext GstSamiContext;
36 #ifdef SUBPARSE_MODIFICATION
37 typedef struct _LanguageStruct GstLangStruct;
38 struct _LanguageStruct
43 #define MAX_LANGUAGE 10
45 struct _GstSamiContext
47 GString *buf; /* buffer to collect content */
48 GString *rubybuf; /* buffer to collect ruby content */
49 GString *resultbuf; /* when opening the next 'sync' tag, move
50 * from 'buf' to avoid to append following
52 GString *state; /* in many sami files there are tags that
53 * are not closed, so for each open tag the
54 * parser will append a tag flag here so
55 * that tags can be closed properly on
56 * 'sync' tags. See _context_push_state()
57 * and _context_pop_state(). */
58 HtmlContext *htmlctxt; /* html parser context */
59 gboolean has_result; /* set when ready to push out result */
60 gboolean in_sync; /* flag to avoid appending anything except the
61 * content of the sync elements to buf */
62 guint64 time1; /* previous start attribute in sync tag */
63 guint64 time2; /* current start attribute in sync tag */
64 #ifdef SUBPARSE_MODIFICATION
65 guint64 time3; /* To store the last current time when language is changed */
66 GList *lang_list; /* Language list for an external subtitle file */
67 gchar *current_language; /* Current language parsed */
68 gchar *desired_language; /* Language set by user */
69 gboolean language_changed; /* language changed signal */
70 gboolean end_body; /* </BODY> reached */
76 void (*start_element) (HtmlContext * ctx,
77 const gchar * name, const gchar ** attr, gpointer user_data);
78 void (*end_element) (HtmlContext * ctx,
79 const gchar * name, gpointer user_data);
80 void (*text) (HtmlContext * ctx,
81 const gchar * text, gsize text_len, gpointer user_data);
86 const HtmlParser *parser;
92 html_context_new (HtmlParser * parser, gpointer user_data)
94 HtmlContext *ctxt = (HtmlContext *) g_new0 (HtmlContext, 1);
95 ctxt->parser = parser;
96 ctxt->user_data = user_data;
97 ctxt->buf = g_string_new (NULL);
102 html_context_free (HtmlContext * ctxt)
104 g_string_free (ctxt->buf, TRUE);
110 const gunichar unescaped;
111 const gchar *escaped;
114 struct EntityMap XmlEntities[] = {
123 struct EntityMap HtmlEntities[] = {
124 /* nbsp will handle manually
377 unescape_string (const gchar * text)
380 GString *unescaped = g_string_new (NULL);
386 /* unescape   and */
387 if (!g_ascii_strncasecmp (text, "nbsp", 4)) {
388 unescaped = g_string_append_unichar (unescaped, 160);
396 /* pass xml entities. these will be processed as pango markup */
397 for (i = 0; XmlEntities[i].escaped; i++) {
398 gssize len = strlen (XmlEntities[i].escaped);
399 if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) {
400 unescaped = g_string_append_c (unescaped, '&');
402 g_string_append_len (unescaped, XmlEntities[i].escaped, len);
408 /* convert html entities */
409 for (i = 0; HtmlEntities[i].escaped; i++) {
410 gssize len = strlen (HtmlEntities[i].escaped);
411 if (!strncmp (text, HtmlEntities[i].escaped, len)) {
413 g_string_append_unichar (unescaped, HtmlEntities[i].unescaped);
420 gboolean is_hex = FALSE;
431 l = strtoul (text, &end, 16);
433 l = strtoul (text, &end, 10);
436 if (text == end || errno != 0) {
437 /* error occured. pass it */
440 unescaped = g_string_append_unichar (unescaped, l);
450 unescaped = g_string_append (unescaped, "&");
455 } else if (g_ascii_isspace (*text)) {
456 unescaped = g_string_append_c (unescaped, ' ');
457 /* strip whitespace */
460 } while ((*text) && g_ascii_isspace (*text));
462 unescaped = g_string_append_c (unescaped, *text);
467 return g_string_free (unescaped, FALSE);
471 string_token (const gchar * string, const gchar * delimiter, gchar ** first)
473 gchar *next = strstr (string, delimiter);
475 *first = g_strndup (string, next - string);
477 *first = g_strdup (string);
483 html_context_handle_element (HtmlContext * ctxt,
484 const gchar * string, gboolean must_close)
489 const gchar *found, *next;
490 #ifdef SUBPARSE_MODIFICATION
491 const gchar *name_temp = NULL;
494 /* split element name and attributes */
495 next = string_token (string, " ", &name);
498 /* count attributes */
501 found = strchr (found, '=');
511 attrs = g_new0 (gchar *, (count + 1) * 2);
513 for (i = 0; i < count; i += 2) {
514 gchar *attr_name = NULL, *attr_value = NULL;
517 #ifdef SUBPARSE_MODIFICATION
518 /* sometimes count can unnecessarily be high value, because of unrequired "=" in subtitle file.
519 * In that case it should not crash */
524 next = string_token (next + 1, "=", &attr_name);
526 #ifdef SUBPARSE_MODIFICATION
527 /* sometimes count can unnecessarily be high value, because of unrequired "=" in subtitle file.
528 * In that case it should not crash */
533 next = string_token (next + 1, " ", &attr_value);
535 /* strip " or ' from attribute value */
536 if (attr_value[0] == '"' || attr_value[0] == '\'') {
537 gchar *tmp = g_strdup (attr_value + 1);
542 length = strlen (attr_value);
543 if (attr_value[length - 1] == '"' || attr_value[length - 1] == '\'') {
544 attr_value[length - 1] = '\0';
547 attrs[i] = attr_name;
548 attrs[i + 1] = attr_value;
550 #ifdef SUBPARSE_MODIFICATION
551 /* sometimes spaces can be there in between !-- and P
552 * that also we have to take care */
553 if (!g_ascii_strcasecmp("!--", name)) {
554 gchar* tempchar = (gchar*)(string + 3);
555 while (*tempchar == ' ') {
557 if (*tempchar == 'P' || *tempchar == 'p') {
558 *(name + 3) = *tempchar;
565 if (next && (!g_ascii_strcasecmp("!--P", name))) {
568 /* count attributes */
571 found = (gchar*)strcasestr (found, "lang");
579 attrs = g_new0 (gchar *, count * 2);
581 for (i = 0; i < count; i++) {
582 gchar *attr_name = NULL, *attr_value = NULL;
584 next = (gchar*)strcasestr (next, "lang:");
585 attr_value = (gchar*)malloc (3);
587 strncpy (attr_value, next, 2);
588 attr_value[2] = '\0';
589 GST_LOG ("Language value comes as %s", attr_value);
592 if (*name_temp == '{') {
593 int character_count = 0;
598 if (*name_temp == '.') {
599 attr_name = (gchar*) malloc (character_count + 1);
602 else if (*name_temp != ' ')
610 for (j = 0; *(name_temp + j) != ' '; j++) {
611 attr_name[j] = *(name_temp + j);
614 attrs[attrindex++] = attr_name;
615 attrs[attrindex++] = attr_value;
621 ctxt->parser->start_element (ctxt, name,
622 (const gchar **) attrs, ctxt->user_data);
624 ctxt->parser->end_element (ctxt, name, ctxt->user_data);
631 html_context_parse (HtmlContext * ctxt, gchar * text, gsize text_len)
633 const gchar *next = NULL;
634 ctxt->buf = g_string_append_len (ctxt->buf, text, text_len);
635 next = ctxt->buf->str;
637 GST_ERROR ("ctxt->buf->str is NULL");
641 if (next[0] == '<') {
642 gchar *element = NULL;
643 /* find <blahblah> */
644 if (!strchr (next, '>')) {
645 /* no tag end point. buffer will be process in next time */
649 next = string_token (next, ">", &element);
651 if (g_str_has_suffix (next, "/")) {
653 element[strlen (element) - 1] = '\0';
654 html_context_handle_element (ctxt, element + 1, TRUE);
655 } else if (element[1] == '/') {
657 ctxt->parser->end_element (ctxt, element + 2, ctxt->user_data);
660 html_context_handle_element (ctxt, element + 1, FALSE);
663 } else if (strchr (next, '<')) {
666 next = string_token (next, "<", &text);
667 text = g_strstrip (text);
668 length = strlen (text);
669 ctxt->parser->text (ctxt, text, length, ctxt->user_data);
673 gchar *text = (gchar *) next;
675 text = g_strstrip (text);
676 length = strlen (text);
677 ctxt->parser->text (ctxt, text, length, ctxt->user_data);
678 ctxt->buf = g_string_assign (ctxt->buf, "");
683 ctxt->buf = g_string_assign (ctxt->buf, next);
687 has_tag (GString * str, const gchar tag)
689 return strrchr (str->str, tag);
693 sami_context_push_state (GstSamiContext * sctx, char state)
695 GST_LOG ("state %c", state);
696 g_string_append_c (sctx->state, state);
700 sami_context_pop_state (GstSamiContext * sctx, char state)
702 GString *str = g_string_new ("");
703 GString *context_state = sctx->state;
706 GST_LOG ("state %c", state);
707 for (i = context_state->len - 1; i >= 0; i--) {
708 switch (context_state->str[i]) {
709 case ITALIC_TAG: /* <i> */
711 g_string_append (str, "</i>");
714 case SPAN_TAG: /* <span foreground= > */
716 g_string_append (str, "</span>");
719 case RUBY_TAG: /* <span size= > -- ruby */
723 case RT_TAG: /* ruby */
725 /* FIXME: support for furigana/ruby once implemented in pango */
726 g_string_append (sctx->rubybuf, "</span>");
727 if (has_tag (context_state, ITALIC_TAG)) {
728 g_string_append (sctx->rubybuf, "</i>");
736 if (context_state->str[i] == state) {
737 g_string_append (sctx->buf, str->str);
738 g_string_free (str, TRUE);
739 g_string_truncate (context_state, i);
743 if (state == CLEAR_TAG) {
744 g_string_append (sctx->buf, str->str);
745 g_string_truncate (context_state, 0);
747 g_string_free (str, TRUE);
751 handle_start_sync (GstSamiContext * sctx, const gchar ** atts)
755 sami_context_pop_state (sctx, CLEAR_TAG);
757 for (i = 0; (atts[i] != NULL); i += 2) {
758 const gchar *key, *value;
765 if (!g_ascii_strcasecmp ("start", key)) {
766 /* Only set a new start time if we don't have text pending */
767 if (sctx->resultbuf->len == 0)
768 sctx->time1 = sctx->time2;
770 sctx->time2 = atoi ((const char *) value) * GST_MSECOND;
771 #ifdef SUBPARSE_MODIFICATION
772 sctx->time3 = sctx->time2;
774 sctx->time2 = MAX (sctx->time2, sctx->time1);
775 g_string_append (sctx->resultbuf, sctx->buf->str);
776 sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE;
777 g_string_truncate (sctx->buf, 0);
784 handle_start_font (GstSamiContext * sctx, const gchar ** atts)
788 sami_context_pop_state (sctx, SPAN_TAG);
790 g_string_append (sctx->buf, "<span");
791 for (i = 0; (atts[i] != NULL); i += 2) {
792 const gchar *key, *value;
799 if (!g_ascii_strcasecmp ("color", key)) {
801 * There are invalid color value in many
803 * It will fix hex color value that start without '#'
805 const gchar *sharp = "";
806 int len = strlen (value);
808 if (!(*value == '#' && len == 7)) {
811 /* check if it looks like hex */
812 if (strtol ((const char *) value, &r, 16) >= 0 &&
813 ((gchar *) r == (value + 6) && len == 6)) {
817 /* some colours can be found in many sami files, but X RGB database
818 * doesn't contain a colour by this name, so map explicitly */
819 if (!g_ascii_strcasecmp ("aqua", value)) {
821 } else if (!g_ascii_strcasecmp ("crimson", value)) {
823 } else if (!g_ascii_strcasecmp ("fuchsia", value)) {
825 } else if (!g_ascii_strcasecmp ("indigo", value)) {
827 } else if (!g_ascii_strcasecmp ("lime", value)) {
829 } else if (!g_ascii_strcasecmp ("olive", value)) {
831 } else if (!g_ascii_strcasecmp ("silver", value)) {
833 } else if (!g_ascii_strcasecmp ("teal", value)) {
836 g_string_append_printf (sctx->buf, " foreground=\"%s%s\"", sharp,
838 } else if (!g_ascii_strcasecmp ("face", key)) {
839 g_string_append_printf (sctx->buf, " font_family=\"%s\"", value);
842 g_string_append_c (sctx->buf, '>');
843 sami_context_push_state (sctx, SPAN_TAG);
847 #ifdef SUBPARSE_MODIFICATION
849 handle_p (GstSamiContext * sctx, const gchar ** atts)
854 for (i = 0; (atts[i] != NULL); i += 2) {
855 const gchar *key, *value;
860 if (sctx->current_language && value && strcmp(sctx->current_language, value)
861 && (sctx->time1 == sctx->time2))
862 sctx->language_changed = TRUE;
864 else if (!sctx->current_language)
865 sctx->current_language = (gchar*) malloc (128);
867 if (key && !g_ascii_strcasecmp ("class", key) && value) {
868 strcpy (sctx->current_language, value);
869 if (sctx->desired_language == NULL && key) {
870 sctx->desired_language = g_strdup(value);
871 GST_LOG("no language list was found and desired lang was set to %s",sctx->desired_language);
874 if (sctx->language_changed)
877 sctx->time2 = sctx->time3;
878 sctx->language_changed = FALSE;
887 handle_start_language_list (GstSamiContext * sctx, const gchar ** atts)
891 GstLangStruct *new = NULL;
892 GstLangStruct *temp = NULL;
895 if (g_list_length (sctx->lang_list)) {
896 GST_LOG ("We already got the language list");
899 for (i = 0; (atts[attrIndex] != NULL); i++) {
900 const gchar *key, *value;
902 key = atts[attrIndex++];
903 value = atts[attrIndex++];
905 GST_LOG ("Inside handle_start_language_list key: %s, value: %s", key, value);
910 new = g_new0 (GstLangStruct, 1);
911 new->language_code = (gchar*) malloc (strlen(value) + 1);
912 if (new->language_code && value)
913 strcpy (new->language_code, value);
914 new->language_key = (gchar*) malloc (strlen(key) + 1);
915 if (new->language_key && key)
916 strcpy (new->language_key, key);
917 sctx->lang_list = g_list_append (sctx->lang_list, new);
918 temp = g_list_nth_data (sctx->lang_list, i);
919 if (sctx->desired_language == NULL && key){
920 sctx->desired_language = g_strdup(key);
924 GST_LOG ("Inside handle_start_language_list of glist key: %s, value: %s",
925 temp->language_key, temp->language_code);
932 handle_start_element (HtmlContext * ctx, const gchar * name,
933 const char **atts, gpointer user_data)
935 GstSamiContext *sctx = (GstSamiContext *) user_data;
937 GST_LOG ("name:%s", name);
939 if (!g_ascii_strcasecmp ("sync", name)) {
940 handle_start_sync (sctx, atts);
941 sctx->in_sync = TRUE;
942 } else if (!g_ascii_strcasecmp ("font", name)) {
943 handle_start_font (sctx, atts);
944 } else if (!g_ascii_strcasecmp ("ruby", name)) {
945 sami_context_push_state (sctx, RUBY_TAG);
946 } else if (!g_ascii_strcasecmp ("br", name)) {
947 #ifdef SUBPARSE_MODIFICATION
948 if (sctx->current_language && sctx->desired_language &&
949 !strcmp(sctx->current_language, sctx->desired_language))
951 g_string_append_c (sctx->buf, '\n');
952 /* FIXME: support for furigana/ruby once implemented in pango */
953 } else if (!g_ascii_strcasecmp ("rt", name)) {
954 if (has_tag (sctx->state, ITALIC_TAG)) {
955 g_string_append (sctx->rubybuf, "<i>");
957 g_string_append (sctx->rubybuf, "<span size='xx-small' rise='-100'>");
958 sami_context_push_state (sctx, RT_TAG);
959 } else if (!g_ascii_strcasecmp ("i", name)) {
960 #ifdef SUBPARSE_MODIFICATION
961 if (sctx->current_language && sctx->desired_language &&
962 !strcmp(sctx->current_language, sctx->desired_language))
964 g_string_append (sctx->buf, "<i>");
965 sami_context_push_state (sctx, ITALIC_TAG);
966 } else if (!g_ascii_strcasecmp ("p", name)) {
967 #ifdef SUBPARSE_MODIFICATION
968 handle_p (sctx, atts);
969 } else if (!g_ascii_strcasecmp ("!--P", name)) {
970 handle_start_language_list (sctx, atts);
976 handle_end_element (HtmlContext * ctx, const char *name, gpointer user_data)
978 GstSamiContext *sctx = (GstSamiContext *) user_data;
980 GST_LOG ("name:%s", name);
982 if (!g_ascii_strcasecmp ("sync", name)) {
983 sctx->in_sync = FALSE;
984 } else if ((!g_ascii_strcasecmp ("body", name)) ||
985 (!g_ascii_strcasecmp ("sami", name))) {
986 /* We will usually have one buffer left when the body is closed
987 * as we need the next sync to actually send it */
989 #ifdef SUBPARSE_MODIFICATION
990 sctx->end_body = TRUE;
993 if (sctx->buf->len != 0) {
994 /* Only set a new start time if we don't have text pending */
995 if (sctx->resultbuf->len == 0)
996 sctx->time1 = sctx->time2;
998 sctx->time2 = GST_CLOCK_TIME_NONE;
999 g_string_append (sctx->resultbuf, sctx->buf->str);
1000 sctx->has_result = (sctx->resultbuf->len != 0) ? TRUE : FALSE;
1001 g_string_truncate (sctx->buf, 0);
1003 } else if (!g_ascii_strcasecmp ("font", name)) {
1004 sami_context_pop_state (sctx, SPAN_TAG);
1005 } else if (!g_ascii_strcasecmp ("ruby", name)) {
1006 sami_context_pop_state (sctx, RUBY_TAG);
1007 } else if (!g_ascii_strcasecmp ("i", name)) {
1008 sami_context_pop_state (sctx, ITALIC_TAG);
1013 handle_text (HtmlContext * ctx, const gchar * text, gsize text_len,
1016 GstSamiContext *sctx = (GstSamiContext *) user_data;
1018 /* Skip everything except content of the sync elements */
1021 #ifdef SUBPARSE_MODIFICATION
1022 if (has_tag (sctx->state, RT_TAG) && (sctx->current_language && sctx->desired_language &&
1023 !strcmp(sctx->current_language, sctx->desired_language))) {
1025 if (has_tag (sctx->state, RT_TAG)) {
1027 g_string_append_c (sctx->rubybuf, ' ');
1028 g_string_append (sctx->rubybuf, text);
1029 g_string_append_c (sctx->rubybuf, ' ');
1031 #ifdef SUBPARSE_MODIFICATION
1032 if (sctx->current_language && sctx->desired_language &&
1033 !strcmp(sctx->current_language, sctx->desired_language))
1035 g_string_append (sctx->buf, text);
1039 static HtmlParser samiParser = {
1040 handle_start_element, /* start_element */
1041 handle_end_element, /* end_element */
1042 handle_text, /* text */
1046 sami_context_init (ParserState * state)
1048 GstSamiContext *context;
1050 g_assert (state->user_data == NULL);
1052 context = g_new0 (GstSamiContext, 1);
1054 context->htmlctxt = html_context_new (&samiParser, context);
1055 context->buf = g_string_new ("");
1056 context->rubybuf = g_string_new ("");
1057 context->resultbuf = g_string_new ("");
1058 context->state = g_string_new ("");
1059 #ifdef SUBPARSE_MODIFICATION
1060 context->current_language = NULL;
1061 context->desired_language = NULL;
1062 context->lang_list = NULL;
1063 context->language_changed = FALSE;
1064 context->end_body = FALSE;
1066 state->user_data = context;
1070 sami_context_deinit (ParserState * state)
1072 GstSamiContext *context = (GstSamiContext *) state->user_data;
1073 #ifdef SUBPARSE_MODIFICATION
1074 GstLangStruct *temp = NULL;
1078 html_context_free (context->htmlctxt);
1079 context->htmlctxt = NULL;
1080 g_string_free (context->buf, TRUE);
1081 g_string_free (context->rubybuf, TRUE);
1082 g_string_free (context->resultbuf, TRUE);
1083 g_string_free (context->state, TRUE);
1084 #ifdef SUBPARSE_MODIFICATION
1085 if (context->lang_list) {
1086 while ((temp = g_list_nth_data (context->lang_list, i))) {
1087 if (temp->language_code)
1088 free (temp->language_code);
1089 temp->language_code = NULL;
1090 if (temp->language_key)
1091 free (temp->language_key);
1092 temp->language_key = NULL;
1096 g_list_free (context->lang_list);
1098 context->lang_list = NULL;
1100 if (context->current_language)
1101 free (context->current_language);
1102 context->current_language = NULL;
1104 context->desired_language = NULL;
1107 state->user_data = NULL;
1112 sami_context_reset (ParserState * state)
1114 GstSamiContext *context = (GstSamiContext *) state->user_data;
1117 g_string_truncate (context->buf, 0);
1118 g_string_truncate (context->rubybuf, 0);
1119 g_string_truncate (context->resultbuf, 0);
1120 g_string_truncate (context->state, 0);
1121 context->has_result = FALSE;
1122 context->in_sync = FALSE;
1128 #ifdef SUBPARSE_MODIFICATION
1130 sami_context_change_language (ParserState * state)
1132 GstSamiContext *context = (GstSamiContext *) state->user_data;
1133 GST_LOG ("**********desired language was %s**************", context->desired_language);
1134 free (context->desired_language);
1135 if(state->current_language) {
1136 context->desired_language = state->current_language;
1138 context->desired_language = state->msl_language;
1140 GST_LOG ("desired language changed to %s", context->desired_language);
1144 sami_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
1145 gsize * consumed, GError ** err, GstSubParse * self)
1149 /* The char cast is necessary in glib < 2.24 */
1151 g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
1152 consumed, NULL, err);
1156 GST_DEBUG_OBJECT (self, "g_convert_with_fallback returns NULL");
1160 /* + 3 to skip UTF-8 BOM if it was added */
1162 if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
1163 && (guint8) ret[2] == 0xBF)
1164 g_memmove (ret, ret + 3, len + 1 - 3);
1170 sami_validate_langlist_body(GList * lang_list, GstSubParse * self){
1171 gchar * file_path_type = NULL;
1172 gchar * file_path = NULL;
1175 guint i = 0, found_count = 0;
1176 const guint list_len = g_list_length(lang_list);
1177 gboolean counter[MAX_LANGUAGE];
1180 gchar *language_code;
1181 gchar *language_key;
1185 GstStructure *structure;
1186 const GValue *value;
1187 structure = gst_structure_new ("FileSrcURI",
1188 "file-uri", G_TYPE_STRING, NULL, NULL);
1190 cquery = gst_query_new_application (GST_QUERY_CUSTOM, structure);
1192 if (!gst_pad_peer_query (self->sinkpad, cquery))
1194 GST_DEBUG_OBJECT (self, "failed to query SMI file path");
1195 gst_query_unref (cquery);
1198 structure = gst_query_get_structure (cquery);
1199 value = gst_structure_get_value (structure, "file-uri");
1200 file_path = g_strdup (g_value_get_string (value));
1202 if (file_path == NULL){
1203 GST_DEBUG_OBJECT (self, "could not parse the SMI file path");
1204 gst_query_unref (cquery);
1207 gst_query_unref (cquery);
1209 GST_INFO_OBJECT (self, "file path comes as %s", file_path);
1211 file_path_type = g_strndup ((gchar *) file_path, 4);
1212 GST_INFO_OBJECT (self, "received file path by query = %s,%s", file_path,file_path_type);
1213 if (!g_strcmp0(file_path_type, "file")){
1215 GST_INFO_OBJECT (self, "file path comes as %s", file_path);
1217 fp = fopen (file_path, "r");
1219 GST_DEBUG_OBJECT (self, "failed to open file");
1223 for(i=0;i<list_len;i++){
1227 while(!feof(fp) && found_count < list_len){
1229 gsize * consumed = NULL;
1231 guint charCount = 0;
1232 gchar* result = NULL;
1234 gchar* temp_lang = NULL;
1235 gchar * temp1 = NULL;
1236 gchar *con_temp_lang = NULL;
1237 gchar *con_temp = NULL;
1238 gboolean conversion = TRUE;
1239 charCount = fread (line, sizeof(char), 1024, fp);
1241 GST_WARNING_OBJECT (self, "fread returned zero bytes");
1244 GST_DEBUG("value of detected encoding is %s and self encoding is %s",self->detected_encoding,self->encoding);
1245 if (self->detected_encoding && strcmp (self->detected_encoding, "UTF-8") && conversion){
1246 result = sami_convert_to_utf8 (line, charCount, self->detected_encoding, consumed, &err, self);
1248 if(result == NULL) {
1252 con_temp = g_utf8_strdown (result,strlen(result));
1255 con_temp = g_strstr_len(con_temp, strlen(con_temp),"class=");
1257 temp1 = g_strstr_len(con_temp+1, strlen(con_temp),"class=");
1259 if(temp1 && con_temp){
1260 gap = strlen(con_temp)-strlen(temp1);
1261 }else if(con_temp) {
1262 gap = strlen(con_temp);
1267 for(i=0;i<list_len;i++){
1268 if(counter[i]==TRUE){
1269 con_temp=con_temp+1;
1272 lang = (struct LangStruct *) g_list_nth_data(lang_list,i);
1274 temp_lang = g_strdup(lang->language_key);
1275 con_temp_lang = g_utf8_strdown (temp_lang,strlen(temp_lang));
1276 if(g_strstr_len(con_temp,gap,con_temp_lang)){
1279 GST_INFO_OBJECT (self, " valid Language in list : [%s]", lang->language_key);
1280 con_temp=con_temp+1;
1283 g_free(con_temp_lang);
1295 if(found_count < list_len){
1296 for(i=0;i<list_len;i++){
1297 if(counter[i]==FALSE)
1298 lang_list = g_list_delete_link(lang_list,g_list_nth(lang_list,i));
1308 parse_sami (ParserState * state, const gchar * line)
1311 #ifdef SUBPARSE_MODIFICATION
1312 gint64 clip_start = 0, clip_stop = 0;
1313 gboolean in_seg = FALSE;
1315 GstSamiContext *context = (GstSamiContext *) state->user_data;
1317 gchar *unescaped = unescape_string (line);
1318 html_context_parse (context->htmlctxt, (gchar *) unescaped,
1319 strlen (unescaped));
1320 #ifdef SUBPARSE_MODIFICATION
1321 if (context->lang_list)
1322 state->language_list = context->lang_list;
1324 if (context->desired_language)
1325 state->current_language = context->desired_language;
1328 #ifdef SUBPARSE_MODIFICATION
1329 if (context->desired_language && context->current_language) {
1330 if ((!strcmp(context->current_language, context->desired_language)) || context->end_body) {
1332 if (context->has_result) {
1333 if (context->rubybuf->len) {
1334 context->rubybuf = g_string_append_c (context->rubybuf, '\n');
1335 g_string_prepend (context->resultbuf, context->rubybuf->str);
1336 context->rubybuf = g_string_truncate (context->rubybuf, 0);
1339 ret = g_string_free (context->resultbuf, FALSE);
1340 context->resultbuf = g_string_new ("");
1341 state->start_time = context->time1;
1342 state->duration = context->time2 - context->time1;
1343 context->has_result = FALSE;
1345 #ifdef SUBPARSE_MODIFICATION
1346 context->end_body = FALSE;
1349 /* Check our segment start/stop */
1350 in_seg = gst_segment_clip (state->segment, GST_FORMAT_TIME,
1351 state->start_time, state->start_time + state->duration, &clip_start,
1354 /* No need to send that text if it's out of segment */
1356 state->start_time = clip_start;
1357 state->duration = clip_stop - clip_start;