1 /* gmarkup.c - Simple XML-like parser
3 * Copyright 2000 Red Hat, Inc.
5 * GLib is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * GLib is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with GLib; see the file COPYING.LIB. If not,
17 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
33 g_markup_error_quark (void)
35 static GQuark error_quark = 0;
38 error_quark = g_quark_from_static_string ("g-markup-error-quark");
46 STATE_AFTER_OPEN_ANGLE,
47 STATE_AFTER_CLOSE_ANGLE,
48 STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
49 STATE_INSIDE_OPEN_TAG_NAME,
50 STATE_INSIDE_ATTRIBUTE_NAME,
51 STATE_BETWEEN_ATTRIBUTES,
52 STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
53 STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
54 STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
56 STATE_AFTER_CLOSE_TAG_SLASH,
57 STATE_INSIDE_CLOSE_TAG_NAME,
58 STATE_INSIDE_PASSTHROUGH,
62 struct _GMarkupParseContext
64 const GMarkupParser *parser;
66 GMarkupParseFlags flags;
72 GDestroyNotify dnotify;
74 /* A piece of character data or an element that
75 * hasn't "ended" yet so we haven't yet called
76 * the callback for it.
78 GString *partial_chunk;
80 GMarkupParseState state;
87 const gchar *current_text;
88 gssize current_text_len;
89 const gchar *current_text_end;
91 GString *leftover_char_portion;
93 /* used to save the start of the last interesting thingy */
98 guint document_empty : 1;
103 * g_markup_parse_context_new:
104 * @parser: a #GMarkupParser
105 * @flags: one or more #GMarkupParseFlags
106 * @user_data: user data to pass to #GMarkupParser functions
107 * @user_data_dnotify: user data destroy notifier called when the parse context is freed
109 * Creates a new parse context. A parse context is used to parse
110 * marked-up documents. You can feed any number of documents into
111 * a context, as long as no errors occur; once an error occurs,
112 * the parse context can't continue to parse text (you have to free it
113 * and create a new parse context).
115 * Return value: a new #GMarkupParseContext
117 GMarkupParseContext *
118 g_markup_parse_context_new (const GMarkupParser *parser,
119 GMarkupParseFlags flags,
121 GDestroyNotify user_data_dnotify)
123 GMarkupParseContext *context;
125 g_return_val_if_fail (parser != NULL, NULL);
127 context = g_new (GMarkupParseContext, 1);
129 context->parser = parser;
130 context->flags = flags;
131 context->user_data = user_data;
132 context->dnotify = user_data_dnotify;
134 context->line_number = 1;
135 context->char_number = 1;
137 context->partial_chunk = NULL;
139 context->state = STATE_START;
140 context->tag_stack = NULL;
141 context->attr_names = NULL;
142 context->attr_values = NULL;
143 context->cur_attr = -1;
144 context->alloc_attrs = 0;
146 context->current_text = NULL;
147 context->current_text_len = -1;
148 context->current_text_end = NULL;
149 context->leftover_char_portion = NULL;
151 context->start = NULL;
152 context->iter = NULL;
154 context->document_empty = TRUE;
155 context->parsing = FALSE;
161 * g_markup_parse_context_free:
162 * @context: a #GMarkupParseContext
164 * Frees a #GMarkupParseContext. Can't be called from inside
165 * one of the #GMarkupParser functions.
169 g_markup_parse_context_free (GMarkupParseContext *context)
171 g_return_if_fail (context != NULL);
172 g_return_if_fail (!context->parsing);
174 if (context->dnotify)
175 (* context->dnotify) (context->user_data);
177 g_strfreev (context->attr_names);
178 g_strfreev (context->attr_values);
180 g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
181 g_slist_free (context->tag_stack);
183 if (context->partial_chunk)
184 g_string_free (context->partial_chunk, TRUE);
186 if (context->leftover_char_portion)
187 g_string_free (context->leftover_char_portion, TRUE);
193 mark_error (GMarkupParseContext *context,
196 context->state = STATE_ERROR;
198 if (context->parser->error)
199 (*context->parser->error) (context, error, context->user_data);
203 set_error (GMarkupParseContext *context,
213 va_start (args, format);
214 s = g_strdup_vprintf (format, args);
217 tmp_error = g_error_new (G_MARKUP_ERROR,
219 _("Error on line %d char %d: %s"),
220 context->line_number,
221 context->char_number,
226 mark_error (context, tmp_error);
228 g_propagate_error (error, tmp_error);
232 is_name_start_char (gunichar c)
234 if (g_unichar_isalpha (c) ||
243 is_name_char (gunichar c)
245 if (g_unichar_isalnum (c) ||
257 char_str (gunichar c,
261 g_unichar_to_utf8 (c, buf);
266 utf8_str (const gchar *utf8,
269 char_str (g_utf8_get_char (utf8), buf);
274 set_unescape_error (GMarkupParseContext *context,
276 const gchar *remaining_text,
277 const gchar *remaining_text_end,
285 gint remaining_newlines;
288 remaining_newlines = 0;
290 while (p != remaining_text_end)
293 ++remaining_newlines;
297 va_start (args, format);
298 s = g_strdup_vprintf (format, args);
301 tmp_error = g_error_new (G_MARKUP_ERROR,
303 _("Error on line %d: %s"),
304 context->line_number - remaining_newlines,
309 mark_error (context, tmp_error);
311 g_propagate_error (error, tmp_error);
317 USTATE_AFTER_AMPERSAND,
318 USTATE_INSIDE_ENTITY_NAME,
319 USTATE_AFTER_CHARREF_HASH
323 unescape_text (GMarkupParseContext *context,
325 const gchar *text_end,
329 #define MAX_ENT_LEN 5
335 str = g_string_new ("");
337 state = USTATE_INSIDE_TEXT;
340 while (p != text_end && context->state != STATE_ERROR)
342 g_assert (p < text_end);
346 case USTATE_INSIDE_TEXT:
348 while (p != text_end && *p != '&')
349 p = g_utf8_next_char (p);
353 g_string_append_len (str, start, p - start);
358 if (p != text_end && *p == '&')
360 p = g_utf8_next_char (p);
361 state = USTATE_AFTER_AMPERSAND;
366 case USTATE_AFTER_AMPERSAND:
370 p = g_utf8_next_char (p);
373 state = USTATE_AFTER_CHARREF_HASH;
375 else if (!is_name_start_char (g_utf8_get_char (p)))
379 set_unescape_error (context, error,
381 G_MARKUP_ERROR_PARSE,
382 _("Empty entity '&;' seen; valid "
383 "entities are: & " < > '"));
389 set_unescape_error (context, error,
391 G_MARKUP_ERROR_PARSE,
392 _("Character '%s' is not valid at "
393 "the start of an entity name; "
394 "the & character begins an entity; "
395 "if this ampersand isn't supposed "
396 "to be an entity, escape it as "
404 state = USTATE_INSIDE_ENTITY_NAME;
410 case USTATE_INSIDE_ENTITY_NAME:
412 gchar buf[MAX_ENT_LEN+1] = {
413 '\0', '\0', '\0', '\0', '\0', '\0'
417 while (p != text_end)
421 else if (!is_name_char (*p))
425 set_unescape_error (context, error,
427 G_MARKUP_ERROR_PARSE,
428 _("Character '%s' is not valid "
429 "inside an entity name"),
434 p = g_utf8_next_char (p);
437 if (context->state != STATE_ERROR)
452 /* move to after semicolon */
453 p = g_utf8_next_char (p);
455 state = USTATE_INSIDE_TEXT;
457 if (strcmp (buf, "lt") == 0)
458 g_string_append_c (str, '<');
459 else if (strcmp (buf, "gt") == 0)
460 g_string_append_c (str, '>');
461 else if (strcmp (buf, "amp") == 0)
462 g_string_append_c (str, '&');
463 else if (strcmp (buf, "quot") == 0)
464 g_string_append_c (str, '"');
465 else if (strcmp (buf, "apos") == 0)
466 g_string_append_c (str, '\'');
469 set_unescape_error (context, error,
471 G_MARKUP_ERROR_PARSE,
472 _("Entity name '%s' is not known"),
478 set_unescape_error (context, error,
479 /* give line number of the & */
481 G_MARKUP_ERROR_PARSE,
482 _("Entity did not end with a semicolon; "
483 "most likely you used an ampersand "
484 "character without intending to start "
485 "an entity - escape ampersand as &"));
491 case USTATE_AFTER_CHARREF_HASH:
493 gboolean is_hex = FALSE;
497 p = g_utf8_next_char (p);
501 while (p != text_end && *p != ';')
502 p = g_utf8_next_char (p);
506 g_assert (*p == ';');
508 /* digit is between start and p */
512 gchar *digit = g_strndup (start, p - start);
515 gchar *digit_end = digit + (p - start);
519 l = strtoul (digit, &end, 16);
521 l = strtoul (digit, &end, 10);
523 if (end != digit_end || errno != 0)
525 set_unescape_error (context, error,
527 G_MARKUP_ERROR_PARSE,
528 _("Failed to parse '%s', which "
529 "should have been a digit "
530 "inside a character reference "
531 "(ê for example) - perhaps "
532 "the digit is too large"),
537 /* characters XML permits */
541 (l >= 0x20 && l <= 0xD7FF) ||
542 (l >= 0xE000 && l <= 0xFFFD) ||
543 (l >= 0x10000 && l <= 0x10FFFF))
546 g_string_append (str, char_str (l, buf));
550 set_unescape_error (context, error,
552 G_MARKUP_ERROR_PARSE,
553 _("Character reference '%s' does not encode a permitted character"),
560 /* Move to next state */
561 p = g_utf8_next_char (p); /* past semicolon */
563 state = USTATE_INSIDE_TEXT;
567 set_unescape_error (context, error,
569 G_MARKUP_ERROR_PARSE,
570 _("Empty character reference; "
571 "should include a digit such as "
577 set_unescape_error (context, error,
579 G_MARKUP_ERROR_PARSE,
580 _("Character reference did not end with a "
582 "most likely you used an ampersand "
583 "character without intending to start "
584 "an entity - escape ampersand as &"));
590 g_assert_not_reached ();
595 if (context->state != STATE_ERROR)
599 case USTATE_INSIDE_TEXT:
601 case USTATE_AFTER_AMPERSAND:
602 case USTATE_INSIDE_ENTITY_NAME:
603 set_unescape_error (context, error,
605 G_MARKUP_ERROR_PARSE,
606 _("Unfinished entity reference"));
608 case USTATE_AFTER_CHARREF_HASH:
609 set_unescape_error (context, error,
611 G_MARKUP_ERROR_PARSE,
612 _("Unfinished character reference"));
617 if (context->state == STATE_ERROR)
619 g_string_free (str, TRUE);
625 *unescaped = g_string_free (str, FALSE);
633 advance_char (GMarkupParseContext *context)
636 context->iter = g_utf8_next_char (context->iter);
637 context->char_number += 1;
638 if (*context->iter == '\n')
640 context->line_number += 1;
641 context->char_number = 1;
644 return context->iter != context->current_text_end;
650 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
654 skip_spaces (GMarkupParseContext *context)
658 if (!xml_isspace (*context->iter))
661 while (advance_char (context));
665 advance_to_name_end (GMarkupParseContext *context)
669 if (!is_name_char (g_utf8_get_char (context->iter)))
672 while (advance_char (context));
676 add_to_partial (GMarkupParseContext *context,
677 const gchar *text_start,
678 const gchar *text_end)
680 if (context->partial_chunk == NULL)
681 context->partial_chunk = g_string_new ("");
683 if (text_start != text_end)
684 g_string_append_len (context->partial_chunk, text_start,
685 text_end - text_start);
687 /* Invariant here that partial_chunk exists */
691 truncate_partial (GMarkupParseContext *context)
693 if (context->partial_chunk != NULL)
695 context->partial_chunk = g_string_truncate (context->partial_chunk, 0);
700 current_element (GMarkupParseContext *context)
702 return context->tag_stack->data;
706 current_attribute (GMarkupParseContext *context)
708 g_assert (context->cur_attr >= 0);
709 return context->attr_names[context->cur_attr];
713 find_current_text_end (GMarkupParseContext *context)
715 /* This function must be safe (non-segfaulting) on invalid UTF8 */
716 const gchar *end = context->current_text + context->current_text_len;
720 g_assert (context->current_text_len > 0);
722 p = context->current_text;
723 next = g_utf8_find_next_char (p, end);
728 next = g_utf8_find_next_char (p, end);
731 /* p is now the start of the last character or character portion. */
733 next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
737 /* whole character */
738 context->current_text_end = end;
743 context->leftover_char_portion = g_string_new_len (p, end - p);
744 context->current_text_len -= (end - p);
745 context->current_text_end = p;
750 add_attribute (GMarkupParseContext *context, char *name)
752 if (context->cur_attr + 2 >= context->alloc_attrs)
754 context->alloc_attrs += 5; /* silly magic number */
755 context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs);
756 context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs);
759 context->attr_names[context->cur_attr] = name;
760 context->attr_values[context->cur_attr] = NULL;
761 context->attr_names[context->cur_attr+1] = NULL;
762 context->attr_values[context->cur_attr+1] = NULL;
766 * g_markup_parse_context_parse:
767 * @context: a #GMarkupParseContext
768 * @text: chunk of text to parse
769 * @text_len: length of @text in bytes
770 * @error: return location for a #GError
772 * Feed some data to the #GMarkupParseContext. The data need not
773 * be valid UTF-8; an error will be signaled if it's invalid.
774 * The data need not be an entire document; you can feed a document
775 * into the parser incrementally, via multiple calls to this function.
776 * Typically, as you receive data from a network connection or file,
777 * you feed each received chunk of data into this function, aborting
778 * the process if an error occurs. Once an error is reported, no further
779 * data may be fed to the #GMarkupParseContext; all errors are fatal.
781 * Return value: %FALSE if an error occurred, %TRUE on success
784 g_markup_parse_context_parse (GMarkupParseContext *context,
789 const gchar *first_invalid;
791 g_return_val_if_fail (context != NULL, FALSE);
792 g_return_val_if_fail (text != NULL, FALSE);
793 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
794 g_return_val_if_fail (!context->parsing, FALSE);
797 text_len = strlen (text);
802 context->parsing = TRUE;
804 if (context->leftover_char_portion)
806 const gchar *first_char;
808 if ((*text & 0xc0) != 0x80)
811 first_char = g_utf8_find_next_char (text, text + text_len);
815 /* leftover_char_portion was completed. Parse it. */
816 GString *portion = context->leftover_char_portion;
818 g_string_append_len (context->leftover_char_portion,
819 text, first_char - text);
821 /* hacks to allow recursion */
822 context->parsing = FALSE;
823 context->leftover_char_portion = NULL;
825 if (!g_markup_parse_context_parse (context,
826 portion->str, portion->len,
829 g_assert (context->state == STATE_ERROR);
832 g_string_free (portion, TRUE);
833 context->parsing = TRUE;
835 /* Skip the fraction of char that was in this text */
836 text_len -= (first_char - text);
841 /* another little chunk of the leftover char; geez
842 * someone is inefficient.
844 g_string_append_len (context->leftover_char_portion,
847 if (context->leftover_char_portion->len > 7)
849 /* The leftover char portion is too big to be
854 G_MARKUP_ERROR_BAD_UTF8,
855 _("Invalid UTF-8 encoded text"));
862 context->current_text = text;
863 context->current_text_len = text_len;
864 context->iter = context->current_text;
865 context->start = context->iter;
867 /* Nothing left after finishing the leftover char, or nothing
868 * passed in to begin with.
870 if (context->current_text_len == 0)
873 /* find_current_text_end () assumes the string starts at
874 * a character start, so we need to validate at least
875 * that much. It doesn't assume any following bytes
878 if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
882 G_MARKUP_ERROR_BAD_UTF8,
883 _("Invalid UTF-8 encoded text"));
887 /* Initialize context->current_text_end, possibly adjusting
888 * current_text_len, and add any leftover char portion
890 find_current_text_end (context);
892 /* Validate UTF8 (must be done after we find the end, since
893 * we could have a trailing incomplete char)
895 if (!g_utf8_validate (context->current_text,
896 context->current_text_len,
901 p = context->current_text;
902 while (p != context->current_text_end)
909 context->line_number += newlines;
913 G_MARKUP_ERROR_BAD_UTF8,
914 _("Invalid UTF-8 encoded text"));
918 while (context->iter != context->current_text_end)
920 switch (context->state)
923 /* Possible next state: AFTER_OPEN_ANGLE */
925 g_assert (context->tag_stack == NULL);
927 /* whitespace is ignored outside of any elements */
928 skip_spaces (context);
930 if (context->iter != context->current_text_end)
932 if (*context->iter == '<')
934 /* Move after the open angle */
935 advance_char (context);
937 context->state = STATE_AFTER_OPEN_ANGLE;
939 /* this could start a passthrough */
940 context->start = context->iter;
942 /* document is now non-empty */
943 context->document_empty = FALSE;
949 G_MARKUP_ERROR_PARSE,
950 _("Document must begin with an element (e.g. <book>)"));
955 case STATE_AFTER_OPEN_ANGLE:
956 /* Possible next states: INSIDE_OPEN_TAG_NAME,
957 * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
959 if (*context->iter == '?' ||
960 *context->iter == '!')
962 /* include < in the passthrough */
963 const gchar *openangle = "<";
964 add_to_partial (context, openangle, openangle + 1);
965 context->start = context->iter;
966 context->state = STATE_INSIDE_PASSTHROUGH;
968 else if (*context->iter == '/')
971 advance_char (context);
973 context->state = STATE_AFTER_CLOSE_TAG_SLASH;
975 else if (is_name_start_char (g_utf8_get_char (context->iter)))
977 context->state = STATE_INSIDE_OPEN_TAG_NAME;
979 /* start of tag name */
980 context->start = context->iter;
987 G_MARKUP_ERROR_PARSE,
988 _("'%s' is not a valid character following "
989 "a '<' character; it may not begin an "
991 utf8_str (context->iter, buf));
995 /* The AFTER_CLOSE_ANGLE state is actually sort of
996 * broken, because it doesn't correspond to a range
997 * of characters in the input stream as the others do,
998 * and thus makes things harder to conceptualize
1000 case STATE_AFTER_CLOSE_ANGLE:
1001 /* Possible next states: INSIDE_TEXT, STATE_START */
1002 if (context->tag_stack == NULL)
1004 context->start = NULL;
1005 context->state = STATE_START;
1009 context->start = context->iter;
1010 context->state = STATE_INSIDE_TEXT;
1014 case STATE_AFTER_ELISION_SLASH:
1015 /* Possible next state: AFTER_CLOSE_ANGLE */
1018 /* We need to pop the tag stack and call the end_element
1019 * function, since this is the close tag
1021 GError *tmp_error = NULL;
1023 g_assert (context->tag_stack != NULL);
1026 if (context->parser->end_element)
1027 (* context->parser->end_element) (context,
1028 context->tag_stack->data,
1034 mark_error (context, tmp_error);
1035 g_propagate_error (error, tmp_error);
1039 if (*context->iter == '>')
1041 /* move after the close angle */
1042 advance_char (context);
1043 context->state = STATE_AFTER_CLOSE_ANGLE;
1050 G_MARKUP_ERROR_PARSE,
1051 _("Odd character '%s', expected a '>' character "
1052 "to end the start tag of element '%s'"),
1053 utf8_str (context->iter, buf),
1054 current_element (context));
1058 g_free (context->tag_stack->data);
1059 context->tag_stack = g_slist_delete_link (context->tag_stack,
1060 context->tag_stack);
1064 case STATE_INSIDE_OPEN_TAG_NAME:
1065 /* Possible next states: BETWEEN_ATTRIBUTES */
1067 /* if there's a partial chunk then it's the first part of the
1068 * tag name. If there's a context->start then it's the start
1069 * of the tag name in current_text, the partial chunk goes
1070 * before that start though.
1072 advance_to_name_end (context);
1074 if (context->iter == context->current_text_end)
1076 /* The name hasn't necessarily ended. Merge with
1077 * partial chunk, leave state unchanged.
1079 add_to_partial (context, context->start, context->iter);
1083 /* The name has ended. Combine it with the partial chunk
1084 * if any; push it on the stack; enter next state.
1086 add_to_partial (context, context->start, context->iter);
1087 context->tag_stack =
1088 g_slist_prepend (context->tag_stack,
1089 g_string_free (context->partial_chunk,
1092 context->partial_chunk = NULL;
1094 context->state = STATE_BETWEEN_ATTRIBUTES;
1095 context->start = NULL;
1099 case STATE_INSIDE_ATTRIBUTE_NAME:
1100 /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1102 /* read the full name, if we enter the equals sign state
1103 * then add the attribute to the list (without the value),
1104 * otherwise store a partial chunk to be prepended later.
1106 advance_to_name_end (context);
1108 if (context->iter == context->current_text_end)
1110 /* The name hasn't necessarily ended. Merge with
1111 * partial chunk, leave state unchanged.
1113 add_to_partial (context, context->start, context->iter);
1117 /* The name has ended. Combine it with the partial chunk
1118 * if any; push it on the stack; enter next state.
1120 add_to_partial (context, context->start, context->iter);
1122 add_attribute (context, g_string_free (context->partial_chunk, FALSE));
1124 context->partial_chunk = NULL;
1125 context->start = NULL;
1127 if (*context->iter == '=')
1129 advance_char (context);
1130 context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1137 G_MARKUP_ERROR_PARSE,
1138 _("Odd character '%s', expected a '=' after "
1139 "attribute name '%s' of element '%s'"),
1140 utf8_str (context->iter, buf),
1141 current_attribute (context),
1142 current_element (context));
1148 case STATE_BETWEEN_ATTRIBUTES:
1149 /* Possible next states: AFTER_CLOSE_ANGLE,
1150 * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1152 skip_spaces (context);
1154 if (context->iter != context->current_text_end)
1156 if (*context->iter == '/')
1158 advance_char (context);
1159 context->state = STATE_AFTER_ELISION_SLASH;
1161 else if (*context->iter == '>')
1164 advance_char (context);
1165 context->state = STATE_AFTER_CLOSE_ANGLE;
1167 else if (is_name_start_char (g_utf8_get_char (context->iter)))
1169 context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1170 /* start of attribute name */
1171 context->start = context->iter;
1178 G_MARKUP_ERROR_PARSE,
1179 _("Odd character '%s', expected a '>' or '/' "
1180 "character to end the start tag of "
1181 "element '%s', or optionally an attribute; "
1182 "perhaps you used an invalid character in "
1183 "an attribute name"),
1184 utf8_str (context->iter, buf),
1185 current_element (context));
1188 /* If we're done with attributes, invoke
1189 * the start_element callback
1191 if (context->state == STATE_AFTER_ELISION_SLASH ||
1192 context->state == STATE_AFTER_CLOSE_ANGLE)
1194 const gchar *start_name;
1195 /* Ugly, but the current code expects an empty array instead of NULL */
1196 const gchar *empty = NULL;
1197 const gchar **attr_names = ∅
1198 const gchar **attr_values = ∅
1201 /* Call user callback for element start */
1202 start_name = current_element (context);
1204 if (context->cur_attr >= 0)
1206 attr_names = (const gchar**)context->attr_names;
1207 attr_values = (const gchar**)context->attr_values;
1211 if (context->parser->start_element)
1212 (* context->parser->start_element) (context,
1214 (const gchar **)attr_names,
1215 (const gchar **)attr_values,
1219 /* Go ahead and free the attributes. */
1220 for (; context->cur_attr >= 0; context->cur_attr--)
1222 int pos = context->cur_attr;
1223 g_free (context->attr_names[pos]);
1224 g_free (context->attr_values[pos]);
1225 context->attr_names[pos] = context->attr_values[pos] = NULL;
1227 g_assert (context->cur_attr == -1);
1228 g_assert (context->attr_names == NULL ||
1229 context->attr_names[0] == NULL);
1230 g_assert (context->attr_values == NULL ||
1231 context->attr_values[0] == NULL);
1233 if (tmp_error != NULL)
1235 mark_error (context, tmp_error);
1236 g_propagate_error (error, tmp_error);
1242 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1243 /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1244 if (*context->iter == '"')
1246 advance_char (context);
1247 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1248 context->start = context->iter;
1250 else if (*context->iter == '\'')
1252 advance_char (context);
1253 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1254 context->start = context->iter;
1261 G_MARKUP_ERROR_PARSE,
1262 _("Odd character '%s', expected an open quote mark "
1263 "after the equals sign when giving value for "
1264 "attribute '%s' of element '%s'"),
1265 utf8_str (context->iter, buf),
1266 current_attribute (context),
1267 current_element (context));
1271 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1272 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1273 /* Possible next states: BETWEEN_ATTRIBUTES */
1277 if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1288 if (*context->iter == delim)
1291 while (advance_char (context));
1293 if (context->iter == context->current_text_end)
1295 /* The value hasn't necessarily ended. Merge with
1296 * partial chunk, leave state unchanged.
1298 add_to_partial (context, context->start, context->iter);
1302 /* The value has ended at the quote mark. Combine it
1303 * with the partial chunk if any; set it for the current
1306 add_to_partial (context, context->start, context->iter);
1308 g_assert (context->cur_attr >= 0);
1310 if (unescape_text (context,
1311 context->partial_chunk->str,
1312 context->partial_chunk->str +
1313 context->partial_chunk->len,
1314 &context->attr_values[context->cur_attr],
1317 /* success, advance past quote and set state. */
1318 advance_char (context);
1319 context->state = STATE_BETWEEN_ATTRIBUTES;
1320 context->start = NULL;
1323 truncate_partial (context);
1327 case STATE_INSIDE_TEXT:
1328 /* Possible next states: AFTER_OPEN_ANGLE */
1331 if (*context->iter == '<')
1334 while (advance_char (context));
1336 /* The text hasn't necessarily ended. Merge with
1337 * partial chunk, leave state unchanged.
1340 add_to_partial (context, context->start, context->iter);
1342 if (context->iter != context->current_text_end)
1344 gchar *unescaped = NULL;
1346 /* The text has ended at the open angle. Call the text
1350 if (unescape_text (context,
1351 context->partial_chunk->str,
1352 context->partial_chunk->str +
1353 context->partial_chunk->len,
1357 GError *tmp_error = NULL;
1359 if (context->parser->text)
1360 (*context->parser->text) (context,
1368 if (tmp_error == NULL)
1370 /* advance past open angle and set state. */
1371 advance_char (context);
1372 context->state = STATE_AFTER_OPEN_ANGLE;
1373 /* could begin a passthrough */
1374 context->start = context->iter;
1378 mark_error (context, tmp_error);
1379 g_propagate_error (error, tmp_error);
1383 truncate_partial (context);
1387 case STATE_AFTER_CLOSE_TAG_SLASH:
1388 /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1389 if (is_name_start_char (g_utf8_get_char (context->iter)))
1391 context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1393 /* start of tag name */
1394 context->start = context->iter;
1401 G_MARKUP_ERROR_PARSE,
1402 _("'%s' is not a valid character following "
1403 "the characters '</'; '%s' may not begin an "
1405 utf8_str (context->iter, buf),
1406 utf8_str (context->iter, buf));
1410 case STATE_INSIDE_CLOSE_TAG_NAME:
1411 /* Possible next state: AFTER_CLOSE_ANGLE */
1412 advance_to_name_end (context);
1414 if (context->iter == context->current_text_end)
1416 /* The name hasn't necessarily ended. Merge with
1417 * partial chunk, leave state unchanged.
1419 add_to_partial (context, context->start, context->iter);
1423 /* The name has ended. Combine it with the partial chunk
1424 * if any; check that it matches stack top and pop
1425 * stack; invoke proper callback; enter next state.
1429 add_to_partial (context, context->start, context->iter);
1431 close_name = g_string_free (context->partial_chunk, FALSE);
1432 context->partial_chunk = NULL;
1434 if (*context->iter != '>')
1439 G_MARKUP_ERROR_PARSE,
1440 _("'%s' is not a valid character following "
1441 "the close element name '%s'; the allowed "
1442 "character is '>'"),
1443 utf8_str (context->iter, buf),
1446 else if (context->tag_stack == NULL)
1450 G_MARKUP_ERROR_PARSE,
1451 _("Element '%s' was closed, no element "
1452 "is currently open"),
1455 else if (strcmp (close_name, current_element (context)) != 0)
1459 G_MARKUP_ERROR_PARSE,
1460 _("Element '%s' was closed, but the currently "
1461 "open element is '%s'"),
1463 current_element (context));
1468 advance_char (context);
1469 context->state = STATE_AFTER_CLOSE_ANGLE;
1470 context->start = NULL;
1472 /* call the end_element callback */
1474 if (context->parser->end_element)
1475 (* context->parser->end_element) (context,
1481 /* Pop the tag stack */
1482 g_free (context->tag_stack->data);
1483 context->tag_stack = g_slist_delete_link (context->tag_stack,
1484 context->tag_stack);
1488 mark_error (context, tmp_error);
1489 g_propagate_error (error, tmp_error);
1493 g_free (close_name);
1497 case STATE_INSIDE_PASSTHROUGH:
1498 /* Possible next state: AFTER_CLOSE_ANGLE */
1501 if (*context->iter == '>')
1504 while (advance_char (context));
1506 if (context->iter == context->current_text_end)
1508 /* The passthrough hasn't necessarily ended. Merge with
1509 * partial chunk, leave state unchanged.
1511 add_to_partial (context, context->start, context->iter);
1515 /* The passthrough has ended at the close angle. Combine
1516 * it with the partial chunk if any. Call the passthrough
1517 * callback. Note that the open/close angles are
1518 * included in the text of the passthrough.
1520 GError *tmp_error = NULL;
1522 advance_char (context); /* advance past close angle */
1523 add_to_partial (context, context->start, context->iter);
1525 if (context->parser->passthrough)
1526 (*context->parser->passthrough) (context,
1527 context->partial_chunk->str,
1528 context->partial_chunk->len,
1532 truncate_partial (context);
1534 if (tmp_error == NULL)
1536 context->state = STATE_AFTER_CLOSE_ANGLE;
1537 context->start = context->iter; /* could begin text */
1541 mark_error (context, tmp_error);
1542 g_propagate_error (error, tmp_error);
1552 g_assert_not_reached ();
1558 context->parsing = FALSE;
1560 return context->state != STATE_ERROR;
1564 * g_markup_parse_context_end_parse:
1565 * @context: a #GMarkupParseContext
1566 * @error: return location for a #GError
1568 * Signals to the #GMarkupParseContext that all data has been
1569 * fed into the parse context with g_markup_parse_context_parse().
1570 * This function reports an error if the document isn't complete,
1571 * for example if elements are still open.
1573 * Return value: %TRUE on success, %FALSE if an error was set
1576 g_markup_parse_context_end_parse (GMarkupParseContext *context,
1579 g_return_val_if_fail (context != NULL, FALSE);
1580 g_return_val_if_fail (!context->parsing, FALSE);
1581 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1583 if (context->partial_chunk != NULL)
1585 g_string_free (context->partial_chunk, TRUE);
1586 context->partial_chunk = NULL;
1589 if (context->document_empty)
1591 set_error (context, error, G_MARKUP_ERROR_EMPTY,
1592 _("Document was empty or contained only whitespace"));
1596 context->parsing = TRUE;
1598 switch (context->state)
1604 case STATE_AFTER_OPEN_ANGLE:
1605 set_error (context, error, G_MARKUP_ERROR_PARSE,
1606 _("Document ended unexpectedly just after an open angle bracket '<'"));
1609 case STATE_AFTER_CLOSE_ANGLE:
1610 if (context->tag_stack != NULL)
1612 /* Error message the same as for INSIDE_TEXT */
1613 set_error (context, error, G_MARKUP_ERROR_PARSE,
1614 _("Document ended unexpectedly with elements still open - "
1615 "'%s' was the last element opened"),
1616 current_element (context));
1620 case STATE_AFTER_ELISION_SLASH:
1621 set_error (context, error, G_MARKUP_ERROR_PARSE,
1622 _("Document ended unexpectedly, expected to see a close angle "
1623 "bracket ending the tag <%s/>"), current_element (context));
1626 case STATE_INSIDE_OPEN_TAG_NAME:
1627 set_error (context, error, G_MARKUP_ERROR_PARSE,
1628 _("Document ended unexpectedly inside an element name"));
1631 case STATE_INSIDE_ATTRIBUTE_NAME:
1632 set_error (context, error, G_MARKUP_ERROR_PARSE,
1633 _("Document ended unexpectedly inside an attribute name"));
1636 case STATE_BETWEEN_ATTRIBUTES:
1637 set_error (context, error, G_MARKUP_ERROR_PARSE,
1638 _("Document ended unexpectedly inside an element-opening "
1642 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1643 set_error (context, error, G_MARKUP_ERROR_PARSE,
1644 _("Document ended unexpectedly after the equals sign "
1645 "following an attribute name; no attribute value"));
1648 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1649 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1650 set_error (context, error, G_MARKUP_ERROR_PARSE,
1651 _("Document ended unexpectedly while inside an attribute "
1655 case STATE_INSIDE_TEXT:
1656 g_assert (context->tag_stack != NULL);
1657 set_error (context, error, G_MARKUP_ERROR_PARSE,
1658 _("Document ended unexpectedly with elements still open - "
1659 "'%s' was the last element opened"),
1660 current_element (context));
1663 case STATE_AFTER_CLOSE_TAG_SLASH:
1664 case STATE_INSIDE_CLOSE_TAG_NAME:
1665 set_error (context, error, G_MARKUP_ERROR_PARSE,
1666 _("Document ended unexpectedly inside the close tag for "
1667 "element '%s'"), current_element);
1670 case STATE_INSIDE_PASSTHROUGH:
1671 set_error (context, error, G_MARKUP_ERROR_PARSE,
1672 _("Document ended unexpectedly inside a comment or "
1673 "processing instruction"));
1678 g_assert_not_reached ();
1682 context->parsing = FALSE;
1684 return context->state != STATE_ERROR;
1688 * g_markup_parse_context_get_position:
1689 * @context: a #GMarkupParseContext
1690 * @line_number: return location for a line number, or %NULL
1691 * @char_number: return location for a char-on-line number, or %NULL
1693 * Retrieves the current line number and the number of the character on
1694 * that line. Intended for use in error messages; there are no strict
1695 * semantics for what constitutes the "current" line number other than
1696 * "the best number we could come up with for error messages."
1700 g_markup_parse_context_get_position (GMarkupParseContext *context,
1704 g_return_if_fail (context != NULL);
1707 *line_number = context->line_number;
1710 *char_number = context->char_number;
1714 append_escaped_text (GString *str,
1722 end = text + length;
1727 next = g_utf8_next_char (p);
1732 g_string_append (str, "&");
1736 g_string_append (str, "<");
1740 g_string_append (str, ">");
1744 g_string_append (str, "'");
1748 g_string_append (str, """);
1752 g_string_append_len (str, p, next - p);
1761 * g_markup_escape_text:
1762 * @text: some valid UTF-8 text
1763 * @length: length of @text in bytes
1765 * Escapes text so that the markup parser will parse it verbatim.
1766 * Less than, greater than, ampersand, etc. are replaced with the
1767 * corresponding entities. This function would typically be used
1768 * when writing out a file to be parsed with the markup parser.
1770 * Return value: escaped text
1773 g_markup_escape_text (const gchar *text,
1778 g_return_val_if_fail (text != NULL, NULL);
1781 length = strlen (text);
1783 str = g_string_new ("");
1784 append_escaped_text (str, text, length);
1786 return g_string_free (str, FALSE);