1 /* gmarkup.c - Simple XML-like parser
3 * Copyright 2000 Red Hat, Inc.
5 * GLib is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * GLib is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with GLib; see the file COPYING.LIB. If not,
17 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
33 g_markup_error_quark (void)
35 static GQuark error_quark = 0;
38 error_quark = g_quark_from_static_string ("g-markup-error-quark");
46 STATE_AFTER_OPEN_ANGLE,
47 STATE_AFTER_CLOSE_ANGLE,
48 STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
49 STATE_INSIDE_OPEN_TAG_NAME,
50 STATE_INSIDE_ATTRIBUTE_NAME,
51 STATE_BETWEEN_ATTRIBUTES,
52 STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
53 STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
54 STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
56 STATE_AFTER_CLOSE_TAG_SLASH,
57 STATE_INSIDE_CLOSE_TAG_NAME,
58 STATE_INSIDE_PASSTHROUGH,
62 struct _GMarkupParseContext
64 const GMarkupParser *parser;
66 GMarkupParseFlags flags;
72 GDestroyNotify dnotify;
74 /* A piece of character data or an element that
75 * hasn't "ended" yet so we haven't yet called
76 * the callback for it.
78 GString *partial_chunk;
80 GMarkupParseState state;
87 const gchar *current_text;
88 gssize current_text_len;
89 const gchar *current_text_end;
91 GString *leftover_char_portion;
93 /* used to save the start of the last interesting thingy */
98 guint document_empty : 1;
103 * g_markup_parse_context_new:
104 * @parser: a #GMarkupParser
105 * @flags: one or more #GMarkupParseFlags
106 * @user_data: user data to pass to #GMarkupParser functions
107 * @user_data_dnotify: user data destroy notifier called when the parse context is freed
109 * Creates a new parse context. A parse context is used to parse
110 * marked-up documents. You can feed any number of documents into
111 * a context, as long as no errors occur; once an error occurs,
112 * the parse context can't continue to parse text (you have to free it
113 * and create a new parse context).
115 * Return value: a new #GMarkupParseContext
117 GMarkupParseContext *
118 g_markup_parse_context_new (const GMarkupParser *parser,
119 GMarkupParseFlags flags,
121 GDestroyNotify user_data_dnotify)
123 GMarkupParseContext *context;
125 g_return_val_if_fail (parser != NULL, NULL);
127 context = g_new (GMarkupParseContext, 1);
129 context->parser = parser;
130 context->flags = flags;
131 context->user_data = user_data;
132 context->dnotify = user_data_dnotify;
134 context->line_number = 1;
135 context->char_number = 1;
137 context->partial_chunk = NULL;
139 context->state = STATE_START;
140 context->tag_stack = NULL;
141 context->attr_names = NULL;
142 context->attr_values = NULL;
143 context->cur_attr = -1;
144 context->alloc_attrs = 0;
146 context->current_text = NULL;
147 context->current_text_len = -1;
148 context->current_text_end = NULL;
149 context->leftover_char_portion = NULL;
151 context->start = NULL;
152 context->iter = NULL;
154 context->document_empty = TRUE;
155 context->parsing = FALSE;
161 * g_markup_parse_context_free:
162 * @context: a #GMarkupParseContext
164 * Frees a #GMarkupParseContext. Can't be called from inside
165 * one of the #GMarkupParser functions.
169 g_markup_parse_context_free (GMarkupParseContext *context)
171 g_return_if_fail (context != NULL);
172 g_return_if_fail (!context->parsing);
174 if (context->dnotify)
175 (* context->dnotify) (context->user_data);
177 g_strfreev (context->attr_names);
178 g_strfreev (context->attr_values);
180 g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
181 g_slist_free (context->tag_stack);
183 if (context->partial_chunk)
184 g_string_free (context->partial_chunk, TRUE);
186 if (context->leftover_char_portion)
187 g_string_free (context->leftover_char_portion, TRUE);
193 mark_error (GMarkupParseContext *context,
196 context->state = STATE_ERROR;
198 if (context->parser->error)
199 (*context->parser->error) (context, error, context->user_data);
203 set_error (GMarkupParseContext *context,
213 va_start (args, format);
214 s = g_strdup_vprintf (format, args);
217 tmp_error = g_error_new (G_MARKUP_ERROR,
219 _("Error on line %d char %d: %s"),
220 context->line_number,
221 context->char_number,
226 mark_error (context, tmp_error);
228 g_propagate_error (error, tmp_error);
232 is_name_start_char (gunichar c)
234 if (g_unichar_isalpha (c) ||
243 is_name_char (gunichar c)
245 if (g_unichar_isalnum (c) ||
257 char_str (gunichar c,
261 g_unichar_to_utf8 (c, buf);
266 utf8_str (const gchar *utf8,
269 char_str (g_utf8_get_char (utf8), buf);
274 set_unescape_error (GMarkupParseContext *context,
276 const gchar *remaining_text,
277 const gchar *remaining_text_end,
285 gint remaining_newlines;
288 remaining_newlines = 0;
290 while (p != remaining_text_end)
293 ++remaining_newlines;
297 va_start (args, format);
298 s = g_strdup_vprintf (format, args);
301 tmp_error = g_error_new (G_MARKUP_ERROR,
303 _("Error on line %d: %s"),
304 context->line_number - remaining_newlines,
309 mark_error (context, tmp_error);
311 g_propagate_error (error, tmp_error);
317 USTATE_AFTER_AMPERSAND,
318 USTATE_INSIDE_ENTITY_NAME,
319 USTATE_AFTER_CHARREF_HASH
323 unescape_text (GMarkupParseContext *context,
325 const gchar *text_end,
329 #define MAX_ENT_LEN 5
335 str = g_string_new ("");
337 state = USTATE_INSIDE_TEXT;
340 while (p != text_end && context->state != STATE_ERROR)
342 g_assert (p < text_end);
346 case USTATE_INSIDE_TEXT:
348 while (p != text_end && *p != '&')
349 p = g_utf8_next_char (p);
353 g_string_append_len (str, start, p - start);
358 if (p != text_end && *p == '&')
360 p = g_utf8_next_char (p);
361 state = USTATE_AFTER_AMPERSAND;
366 case USTATE_AFTER_AMPERSAND:
370 p = g_utf8_next_char (p);
373 state = USTATE_AFTER_CHARREF_HASH;
375 else if (!is_name_start_char (g_utf8_get_char (p)))
379 set_unescape_error (context, error,
381 G_MARKUP_ERROR_PARSE,
382 _("Empty entity '&;' seen; valid "
383 "entities are: & " < > '"));
389 set_unescape_error (context, error,
391 G_MARKUP_ERROR_PARSE,
392 _("Character '%s' is not valid at "
393 "the start of an entity name; "
394 "the & character begins an entity; "
395 "if this ampersand isn't supposed "
396 "to be an entity, escape it as "
404 state = USTATE_INSIDE_ENTITY_NAME;
410 case USTATE_INSIDE_ENTITY_NAME:
412 gchar buf[MAX_ENT_LEN+1] = {
413 '\0', '\0', '\0', '\0', '\0', '\0'
417 while (p != text_end)
421 else if (!is_name_char (*p))
425 set_unescape_error (context, error,
427 G_MARKUP_ERROR_PARSE,
428 _("Character '%s' is not valid "
429 "inside an entity name"),
434 p = g_utf8_next_char (p);
437 if (context->state != STATE_ERROR)
452 /* move to after semicolon */
453 p = g_utf8_next_char (p);
455 state = USTATE_INSIDE_TEXT;
457 if (strcmp (buf, "lt") == 0)
458 g_string_append_c (str, '<');
459 else if (strcmp (buf, "gt") == 0)
460 g_string_append_c (str, '>');
461 else if (strcmp (buf, "amp") == 0)
462 g_string_append_c (str, '&');
463 else if (strcmp (buf, "quot") == 0)
464 g_string_append_c (str, '"');
465 else if (strcmp (buf, "apos") == 0)
466 g_string_append_c (str, '\'');
469 set_unescape_error (context, error,
471 G_MARKUP_ERROR_PARSE,
472 _("Entity name '%s' is not known"),
478 set_unescape_error (context, error,
479 /* give line number of the & */
481 G_MARKUP_ERROR_PARSE,
482 _("Entity did not end with a semicolon; "
483 "most likely you used an ampersand "
484 "character without intending to start "
485 "an entity - escape ampersand as &"));
491 case USTATE_AFTER_CHARREF_HASH:
493 gboolean is_hex = FALSE;
497 p = g_utf8_next_char (p);
501 while (p != text_end && *p != ';')
502 p = g_utf8_next_char (p);
506 g_assert (*p == ';');
508 /* digit is between start and p */
512 gchar *digit = g_strndup (start, p - start);
515 gchar *digit_end = digit + (p - start);
519 l = strtoul (digit, &end, 16);
521 l = strtoul (digit, &end, 10);
523 if (end != digit_end || errno != 0)
525 set_unescape_error (context, error,
527 G_MARKUP_ERROR_PARSE,
528 _("Failed to parse '%s', which "
529 "should have been a digit "
530 "inside a character reference "
531 "(ê for example) - perhaps "
532 "the digit is too large"),
537 /* characters XML permits */
541 (l >= 0x20 && l <= 0xD7FF) ||
542 (l >= 0xE000 && l <= 0xFFFD) ||
543 (l >= 0x10000 && l <= 0x10FFFF))
546 g_string_append (str, char_str (l, buf));
550 set_unescape_error (context, error,
552 G_MARKUP_ERROR_PARSE,
553 _("Character reference '%s' does not encode a permitted character"),
560 /* Move to next state */
561 p = g_utf8_next_char (p); /* past semicolon */
563 state = USTATE_INSIDE_TEXT;
567 set_unescape_error (context, error,
569 G_MARKUP_ERROR_PARSE,
570 _("Empty character reference; "
571 "should include a digit such as "
577 set_unescape_error (context, error,
579 G_MARKUP_ERROR_PARSE,
580 _("Character reference did not end with a "
582 "most likely you used an ampersand "
583 "character without intending to start "
584 "an entity - escape ampersand as &"));
590 g_assert_not_reached ();
595 /* If no errors, we should have returned to USTATE_INSIDE_TEXT */
596 g_assert (context->state == STATE_ERROR ||
597 state == USTATE_INSIDE_TEXT);
599 if (context->state == STATE_ERROR)
601 g_string_free (str, TRUE);
607 *unescaped = g_string_free (str, FALSE);
615 advance_char (GMarkupParseContext *context)
618 context->iter = g_utf8_next_char (context->iter);
619 context->char_number += 1;
620 if (*context->iter == '\n')
622 context->line_number += 1;
623 context->char_number = 1;
626 return context->iter != context->current_text_end;
632 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
636 skip_spaces (GMarkupParseContext *context)
640 if (!xml_isspace (*context->iter))
643 while (advance_char (context));
647 advance_to_name_end (GMarkupParseContext *context)
651 if (!is_name_char (g_utf8_get_char (context->iter)))
654 while (advance_char (context));
658 add_to_partial (GMarkupParseContext *context,
659 const gchar *text_start,
660 const gchar *text_end)
662 if (context->partial_chunk == NULL)
663 context->partial_chunk = g_string_new ("");
665 if (text_start != text_end)
666 g_string_append_len (context->partial_chunk, text_start,
667 text_end - text_start);
669 /* Invariant here that partial_chunk exists */
673 truncate_partial (GMarkupParseContext *context)
675 if (context->partial_chunk != NULL)
677 context->partial_chunk = g_string_truncate (context->partial_chunk, 0);
682 current_element (GMarkupParseContext *context)
684 return context->tag_stack->data;
688 current_attribute (GMarkupParseContext *context)
690 g_assert (context->cur_attr >= 0);
691 return context->attr_names[context->cur_attr];
695 find_current_text_end (GMarkupParseContext *context)
697 /* This function must be safe (non-segfaulting) on invalid UTF8 */
698 const gchar *end = context->current_text + context->current_text_len;
702 g_assert (context->current_text_len > 0);
704 p = context->current_text;
705 next = g_utf8_find_next_char (p, end);
710 next = g_utf8_find_next_char (p, end);
713 /* p is now the start of the last character or character portion. */
715 next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
719 /* whole character */
720 context->current_text_end = end;
725 context->leftover_char_portion = g_string_new_len (p, end - p);
726 context->current_text_len -= (end - p);
727 context->current_text_end = p;
732 add_attribute (GMarkupParseContext *context, char *name)
734 if (context->cur_attr + 2 >= context->alloc_attrs)
736 context->alloc_attrs += 5; /* silly magic number */
737 context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs);
738 context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs);
741 context->attr_names[context->cur_attr] = name;
742 context->attr_values[context->cur_attr] = NULL;
743 context->attr_names[context->cur_attr+1] = NULL;
744 context->attr_values[context->cur_attr+1] = NULL;
748 * g_markup_parse_context_parse:
749 * @context: a #GMarkupParseContext
750 * @text: chunk of text to parse
751 * @text_len: length of @text in bytes
752 * @error: return location for a #GError
754 * Feed some data to the #GMarkupParseContext. The data need not
755 * be valid UTF-8; an error will be signaled if it's invalid.
756 * The data need not be an entire document; you can feed a document
757 * into the parser incrementally, via multiple calls to this function.
758 * Typically, as you receive data from a network connection or file,
759 * you feed each received chunk of data into this function, aborting
760 * the process if an error occurs. Once an error is reported, no further
761 * data may be fed to the #GMarkupParseContext; all errors are fatal.
763 * Return value: %FALSE if an error occurred, %TRUE on success
766 g_markup_parse_context_parse (GMarkupParseContext *context,
771 const gchar *first_invalid;
773 g_return_val_if_fail (context != NULL, FALSE);
774 g_return_val_if_fail (text != NULL, FALSE);
775 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
776 g_return_val_if_fail (!context->parsing, FALSE);
779 text_len = strlen (text);
784 context->parsing = TRUE;
786 if (context->leftover_char_portion)
788 const gchar *first_char;
790 if ((*text & 0xc0) != 0x80)
793 first_char = g_utf8_find_next_char (text, text + text_len);
797 /* leftover_char_portion was completed. Parse it. */
798 GString *portion = context->leftover_char_portion;
800 g_string_append_len (context->leftover_char_portion,
801 text, first_char - text);
803 /* hacks to allow recursion */
804 context->parsing = FALSE;
805 context->leftover_char_portion = NULL;
807 if (!g_markup_parse_context_parse (context,
808 portion->str, portion->len,
811 g_assert (context->state == STATE_ERROR);
814 g_string_free (portion, TRUE);
815 context->parsing = TRUE;
817 /* Skip the fraction of char that was in this text */
818 text_len -= (first_char - text);
823 /* another little chunk of the leftover char; geez
824 * someone is inefficient.
826 g_string_append_len (context->leftover_char_portion,
829 if (context->leftover_char_portion->len > 7)
831 /* The leftover char portion is too big to be
836 G_MARKUP_ERROR_BAD_UTF8,
837 _("Invalid UTF-8 encoded text"));
844 context->current_text = text;
845 context->current_text_len = text_len;
846 context->iter = context->current_text;
847 context->start = context->iter;
849 /* Nothing left after finishing the leftover char, or nothing
850 * passed in to begin with.
852 if (context->current_text_len == 0)
855 /* find_current_text_end () assumes the string starts at
856 * a character start, so we need to validate at least
857 * that much. It doesn't assume any following bytes
860 if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
864 G_MARKUP_ERROR_BAD_UTF8,
865 _("Invalid UTF-8 encoded text"));
869 /* Initialize context->current_text_end, possibly adjusting
870 * current_text_len, and add any leftover char portion
872 find_current_text_end (context);
874 /* Validate UTF8 (must be done after we find the end, since
875 * we could have a trailing incomplete char)
877 if (!g_utf8_validate (context->current_text,
878 context->current_text_len,
883 p = context->current_text;
884 while (p != context->current_text_end)
891 context->line_number += newlines;
895 G_MARKUP_ERROR_BAD_UTF8,
896 _("Invalid UTF-8 encoded text"));
900 while (context->iter != context->current_text_end)
902 switch (context->state)
905 /* Possible next state: AFTER_OPEN_ANGLE */
907 g_assert (context->tag_stack == NULL);
909 /* whitespace is ignored outside of any elements */
910 skip_spaces (context);
912 if (context->iter != context->current_text_end)
914 if (*context->iter == '<')
916 /* Move after the open angle */
917 advance_char (context);
919 context->state = STATE_AFTER_OPEN_ANGLE;
921 /* this could start a passthrough */
922 context->start = context->iter;
924 /* document is now non-empty */
925 context->document_empty = FALSE;
931 G_MARKUP_ERROR_PARSE,
932 _("Document must begin with an element (e.g. <book>)"));
937 case STATE_AFTER_OPEN_ANGLE:
938 /* Possible next states: INSIDE_OPEN_TAG_NAME,
939 * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
941 if (*context->iter == '?' ||
942 *context->iter == '!')
944 /* include < in the passthrough */
945 const gchar *openangle = "<";
946 add_to_partial (context, openangle, openangle + 1);
947 context->start = context->iter;
948 context->state = STATE_INSIDE_PASSTHROUGH;
950 else if (*context->iter == '/')
953 advance_char (context);
955 context->state = STATE_AFTER_CLOSE_TAG_SLASH;
957 else if (is_name_start_char (g_utf8_get_char (context->iter)))
959 context->state = STATE_INSIDE_OPEN_TAG_NAME;
961 /* start of tag name */
962 context->start = context->iter;
969 G_MARKUP_ERROR_PARSE,
970 _("'%s' is not a valid character following "
971 "a '<' character; it may not begin an "
973 utf8_str (context->iter, buf));
977 /* The AFTER_CLOSE_ANGLE state is actually sort of
978 * broken, because it doesn't correspond to a range
979 * of characters in the input stream as the others do,
980 * and thus makes things harder to conceptualize
982 case STATE_AFTER_CLOSE_ANGLE:
983 /* Possible next states: INSIDE_TEXT, STATE_START */
984 if (context->tag_stack == NULL)
986 context->start = NULL;
987 context->state = STATE_START;
991 context->start = context->iter;
992 context->state = STATE_INSIDE_TEXT;
996 case STATE_AFTER_ELISION_SLASH:
997 /* Possible next state: AFTER_CLOSE_ANGLE */
1000 /* We need to pop the tag stack and call the end_element
1001 * function, since this is the close tag
1003 GError *tmp_error = NULL;
1005 g_assert (context->tag_stack != NULL);
1008 if (context->parser->end_element)
1009 (* context->parser->end_element) (context,
1010 context->tag_stack->data,
1016 mark_error (context, tmp_error);
1017 g_propagate_error (error, tmp_error);
1021 if (*context->iter == '>')
1023 /* move after the close angle */
1024 advance_char (context);
1025 context->state = STATE_AFTER_CLOSE_ANGLE;
1032 G_MARKUP_ERROR_PARSE,
1033 _("Odd character '%s', expected a '>' character "
1034 "to end the start tag of element '%s'"),
1035 utf8_str (context->iter, buf),
1036 current_element (context));
1040 g_free (context->tag_stack->data);
1041 context->tag_stack = g_slist_delete_link (context->tag_stack,
1042 context->tag_stack);
1046 case STATE_INSIDE_OPEN_TAG_NAME:
1047 /* Possible next states: BETWEEN_ATTRIBUTES */
1049 /* if there's a partial chunk then it's the first part of the
1050 * tag name. If there's a context->start then it's the start
1051 * of the tag name in current_text, the partial chunk goes
1052 * before that start though.
1054 advance_to_name_end (context);
1056 if (context->iter == context->current_text_end)
1058 /* The name hasn't necessarily ended. Merge with
1059 * partial chunk, leave state unchanged.
1061 add_to_partial (context, context->start, context->iter);
1065 /* The name has ended. Combine it with the partial chunk
1066 * if any; push it on the stack; enter next state.
1068 add_to_partial (context, context->start, context->iter);
1069 context->tag_stack =
1070 g_slist_prepend (context->tag_stack,
1071 g_string_free (context->partial_chunk,
1074 context->partial_chunk = NULL;
1076 context->state = STATE_BETWEEN_ATTRIBUTES;
1077 context->start = NULL;
1081 case STATE_INSIDE_ATTRIBUTE_NAME:
1082 /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1084 /* read the full name, if we enter the equals sign state
1085 * then add the attribute to the list (without the value),
1086 * otherwise store a partial chunk to be prepended later.
1088 advance_to_name_end (context);
1090 if (context->iter == context->current_text_end)
1092 /* The name hasn't necessarily ended. Merge with
1093 * partial chunk, leave state unchanged.
1095 add_to_partial (context, context->start, context->iter);
1099 /* The name has ended. Combine it with the partial chunk
1100 * if any; push it on the stack; enter next state.
1102 add_to_partial (context, context->start, context->iter);
1104 add_attribute (context, g_string_free (context->partial_chunk, FALSE));
1106 context->partial_chunk = NULL;
1107 context->start = NULL;
1109 if (*context->iter == '=')
1111 advance_char (context);
1112 context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1119 G_MARKUP_ERROR_PARSE,
1120 _("Odd character '%s', expected a '=' after "
1121 "attribute name '%s' of element '%s'"),
1122 utf8_str (context->iter, buf),
1123 current_attribute (context),
1124 current_element (context));
1130 case STATE_BETWEEN_ATTRIBUTES:
1131 /* Possible next states: AFTER_CLOSE_ANGLE,
1132 * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1134 skip_spaces (context);
1136 if (context->iter != context->current_text_end)
1138 if (*context->iter == '/')
1140 advance_char (context);
1141 context->state = STATE_AFTER_ELISION_SLASH;
1143 else if (*context->iter == '>')
1146 advance_char (context);
1147 context->state = STATE_AFTER_CLOSE_ANGLE;
1149 else if (is_name_start_char (g_utf8_get_char (context->iter)))
1151 context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1152 /* start of attribute name */
1153 context->start = context->iter;
1160 G_MARKUP_ERROR_PARSE,
1161 _("Odd character '%s', expected a '>' or '/' "
1162 "character to end the start tag of "
1163 "element '%s', or optionally an attribute; "
1164 "perhaps you used an invalid character in "
1165 "an attribute name"),
1166 utf8_str (context->iter, buf),
1167 current_element (context));
1170 /* If we're done with attributes, invoke
1171 * the start_element callback
1173 if (context->state == STATE_AFTER_ELISION_SLASH ||
1174 context->state == STATE_AFTER_CLOSE_ANGLE)
1176 const gchar *start_name;
1177 /* Ugly, but the current code expects an empty array instead of NULL */
1178 const gchar *empty = NULL;
1179 const gchar **attr_names = ∅
1180 const gchar **attr_values = ∅
1183 /* Call user callback for element start */
1184 start_name = current_element (context);
1186 if (context->cur_attr >= 0)
1188 attr_names = (const gchar**)context->attr_names;
1189 attr_values = (const gchar**)context->attr_values;
1193 if (context->parser->start_element)
1194 (* context->parser->start_element) (context,
1196 (const gchar **)attr_names,
1197 (const gchar **)attr_values,
1201 /* Go ahead and free the attributes. */
1202 for (; context->cur_attr >= 0; context->cur_attr--)
1204 int pos = context->cur_attr;
1205 g_free (context->attr_names[pos]);
1206 g_free (context->attr_values[pos]);
1207 context->attr_names[pos] = context->attr_values[pos] = NULL;
1209 g_assert (context->cur_attr == -1);
1210 g_assert (context->attr_names == NULL ||
1211 context->attr_names[0] == NULL);
1212 g_assert (context->attr_values == NULL ||
1213 context->attr_values[0] == NULL);
1215 if (tmp_error != NULL)
1217 mark_error (context, tmp_error);
1218 g_propagate_error (error, tmp_error);
1224 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1225 /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1226 if (*context->iter == '"')
1228 advance_char (context);
1229 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1230 context->start = context->iter;
1232 else if (*context->iter == '\'')
1234 advance_char (context);
1235 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1236 context->start = context->iter;
1243 G_MARKUP_ERROR_PARSE,
1244 _("Odd character '%s', expected an open quote mark "
1245 "after the equals sign when giving value for "
1246 "attribute '%s' of element '%s'"),
1247 utf8_str (context->iter, buf),
1248 current_attribute (context),
1249 current_element (context));
1253 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1254 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1255 /* Possible next states: BETWEEN_ATTRIBUTES */
1259 if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1270 if (*context->iter == delim)
1273 while (advance_char (context));
1275 if (context->iter == context->current_text_end)
1277 /* The value hasn't necessarily ended. Merge with
1278 * partial chunk, leave state unchanged.
1280 add_to_partial (context, context->start, context->iter);
1284 /* The value has ended at the quote mark. Combine it
1285 * with the partial chunk if any; set it for the current
1288 add_to_partial (context, context->start, context->iter);
1290 g_assert (context->cur_attr >= 0);
1292 if (unescape_text (context,
1293 context->partial_chunk->str,
1294 context->partial_chunk->str +
1295 context->partial_chunk->len,
1296 &context->attr_values[context->cur_attr],
1299 /* success, advance past quote and set state. */
1300 advance_char (context);
1301 context->state = STATE_BETWEEN_ATTRIBUTES;
1302 context->start = NULL;
1305 truncate_partial (context);
1309 case STATE_INSIDE_TEXT:
1310 /* Possible next states: AFTER_OPEN_ANGLE */
1313 if (*context->iter == '<')
1316 while (advance_char (context));
1318 /* The text hasn't necessarily ended. Merge with
1319 * partial chunk, leave state unchanged.
1322 add_to_partial (context, context->start, context->iter);
1324 if (context->iter != context->current_text_end)
1326 gchar *unescaped = NULL;
1328 /* The text has ended at the open angle. Call the text
1332 if (unescape_text (context,
1333 context->partial_chunk->str,
1334 context->partial_chunk->str +
1335 context->partial_chunk->len,
1339 GError *tmp_error = NULL;
1341 if (context->parser->text)
1342 (*context->parser->text) (context,
1350 if (tmp_error == NULL)
1352 /* advance past open angle and set state. */
1353 advance_char (context);
1354 context->state = STATE_AFTER_OPEN_ANGLE;
1355 /* could begin a passthrough */
1356 context->start = context->iter;
1360 mark_error (context, tmp_error);
1361 g_propagate_error (error, tmp_error);
1365 truncate_partial (context);
1369 case STATE_AFTER_CLOSE_TAG_SLASH:
1370 /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1371 if (is_name_start_char (g_utf8_get_char (context->iter)))
1373 context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1375 /* start of tag name */
1376 context->start = context->iter;
1383 G_MARKUP_ERROR_PARSE,
1384 _("'%s' is not a valid character following "
1385 "the characters '</'; '%s' may not begin an "
1387 utf8_str (context->iter, buf),
1388 utf8_str (context->iter, buf));
1392 case STATE_INSIDE_CLOSE_TAG_NAME:
1393 /* Possible next state: AFTER_CLOSE_ANGLE */
1394 advance_to_name_end (context);
1396 if (context->iter == context->current_text_end)
1398 /* The name hasn't necessarily ended. Merge with
1399 * partial chunk, leave state unchanged.
1401 add_to_partial (context, context->start, context->iter);
1405 /* The name has ended. Combine it with the partial chunk
1406 * if any; check that it matches stack top and pop
1407 * stack; invoke proper callback; enter next state.
1411 add_to_partial (context, context->start, context->iter);
1413 close_name = g_string_free (context->partial_chunk, FALSE);
1414 context->partial_chunk = NULL;
1416 if (*context->iter != '>')
1421 G_MARKUP_ERROR_PARSE,
1422 _("'%s' is not a valid character following "
1423 "the close element name '%s'; the allowed "
1424 "character is '>'"),
1425 utf8_str (context->iter, buf),
1428 else if (context->tag_stack == NULL)
1432 G_MARKUP_ERROR_PARSE,
1433 _("Element '%s' was closed, no element "
1434 "is currently open"),
1437 else if (strcmp (close_name, current_element (context)) != 0)
1441 G_MARKUP_ERROR_PARSE,
1442 _("Element '%s' was closed, but the currently "
1443 "open element is '%s'"),
1445 current_element (context));
1450 advance_char (context);
1451 context->state = STATE_AFTER_CLOSE_ANGLE;
1452 context->start = NULL;
1454 /* call the end_element callback */
1456 if (context->parser->end_element)
1457 (* context->parser->end_element) (context,
1463 /* Pop the tag stack */
1464 g_free (context->tag_stack->data);
1465 context->tag_stack = g_slist_delete_link (context->tag_stack,
1466 context->tag_stack);
1470 mark_error (context, tmp_error);
1471 g_propagate_error (error, tmp_error);
1475 g_free (close_name);
1479 case STATE_INSIDE_PASSTHROUGH:
1480 /* Possible next state: AFTER_CLOSE_ANGLE */
1483 if (*context->iter == '>')
1486 while (advance_char (context));
1488 if (context->iter == context->current_text_end)
1490 /* The passthrough hasn't necessarily ended. Merge with
1491 * partial chunk, leave state unchanged.
1493 add_to_partial (context, context->start, context->iter);
1497 /* The passthrough has ended at the close angle. Combine
1498 * it with the partial chunk if any. Call the passthrough
1499 * callback. Note that the open/close angles are
1500 * included in the text of the passthrough.
1502 GError *tmp_error = NULL;
1504 advance_char (context); /* advance past close angle */
1505 add_to_partial (context, context->start, context->iter);
1507 if (context->parser->passthrough)
1508 (*context->parser->passthrough) (context,
1509 context->partial_chunk->str,
1510 context->partial_chunk->len,
1514 truncate_partial (context);
1516 if (tmp_error == NULL)
1518 context->state = STATE_AFTER_CLOSE_ANGLE;
1519 context->start = context->iter; /* could begin text */
1523 mark_error (context, tmp_error);
1524 g_propagate_error (error, tmp_error);
1534 g_assert_not_reached ();
1540 context->parsing = FALSE;
1542 return context->state != STATE_ERROR;
1546 * g_markup_parse_context_end_parse:
1547 * @context: a #GMarkupParseContext
1548 * @error: return location for a #GError
1550 * Signals to the #GMarkupParseContext that all data has been
1551 * fed into the parse context with g_markup_parse_context_parse().
1552 * This function reports an error if the document isn't complete,
1553 * for example if elements are still open.
1555 * Return value: %TRUE on success, %FALSE if an error was set
1558 g_markup_parse_context_end_parse (GMarkupParseContext *context,
1561 g_return_val_if_fail (context != NULL, FALSE);
1562 g_return_val_if_fail (!context->parsing, FALSE);
1563 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1565 if (context->partial_chunk != NULL)
1567 g_string_free (context->partial_chunk, TRUE);
1568 context->partial_chunk = NULL;
1571 if (context->document_empty)
1573 set_error (context, error, G_MARKUP_ERROR_EMPTY,
1574 _("Document was empty or contained only whitespace"));
1578 context->parsing = TRUE;
1580 switch (context->state)
1586 case STATE_AFTER_OPEN_ANGLE:
1587 set_error (context, error, G_MARKUP_ERROR_PARSE,
1588 _("Document ended unexpectedly just after an open angle bracket '<'"));
1591 case STATE_AFTER_CLOSE_ANGLE:
1592 if (context->tag_stack != NULL)
1594 /* Error message the same as for INSIDE_TEXT */
1595 set_error (context, error, G_MARKUP_ERROR_PARSE,
1596 _("Document ended unexpectedly with elements still open - "
1597 "'%s' was the last element opened"),
1598 current_element (context));
1602 case STATE_AFTER_ELISION_SLASH:
1603 set_error (context, error, G_MARKUP_ERROR_PARSE,
1604 _("Document ended unexpectedly, expected to see a close angle "
1605 "bracket ending the tag <%s/>"), current_element (context));
1608 case STATE_INSIDE_OPEN_TAG_NAME:
1609 set_error (context, error, G_MARKUP_ERROR_PARSE,
1610 _("Document ended unexpectedly inside an element name"));
1613 case STATE_INSIDE_ATTRIBUTE_NAME:
1614 set_error (context, error, G_MARKUP_ERROR_PARSE,
1615 _("Document ended unexpectedly inside an attribute name"));
1618 case STATE_BETWEEN_ATTRIBUTES:
1619 set_error (context, error, G_MARKUP_ERROR_PARSE,
1620 _("Document ended unexpectedly inside an element-opening "
1624 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1625 set_error (context, error, G_MARKUP_ERROR_PARSE,
1626 _("Document ended unexpectedly after the equals sign "
1627 "following an attribute name; no attribute value"));
1630 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1631 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1632 set_error (context, error, G_MARKUP_ERROR_PARSE,
1633 _("Document ended unexpectedly while inside an attribute "
1637 case STATE_INSIDE_TEXT:
1638 g_assert (context->tag_stack != NULL);
1639 set_error (context, error, G_MARKUP_ERROR_PARSE,
1640 _("Document ended unexpectedly with elements still open - "
1641 "'%s' was the last element opened"),
1642 current_element (context));
1645 case STATE_AFTER_CLOSE_TAG_SLASH:
1646 case STATE_INSIDE_CLOSE_TAG_NAME:
1647 set_error (context, error, G_MARKUP_ERROR_PARSE,
1648 _("Document ended unexpectedly inside the close tag for "
1649 "element '%s'"), current_element);
1652 case STATE_INSIDE_PASSTHROUGH:
1653 set_error (context, error, G_MARKUP_ERROR_PARSE,
1654 _("Document ended unexpectedly inside a comment or "
1655 "processing instruction"));
1660 g_assert_not_reached ();
1664 context->parsing = FALSE;
1666 return context->state != STATE_ERROR;
1670 * g_markup_parse_context_get_position:
1671 * @context: a #GMarkupParseContext
1672 * @line_number: return location for a line number, or %NULL
1673 * @char_number: return location for a char-on-line number, or %NULL
1675 * Retrieves the current line number and the number of the character on
1676 * that line. Intended for use in error messages; there are no strict
1677 * semantics for what constitutes the "current" line number other than
1678 * "the best number we could come up with for error messages."
1682 g_markup_parse_context_get_position (GMarkupParseContext *context,
1686 g_return_if_fail (context != NULL);
1689 *line_number = context->line_number;
1692 *char_number = context->char_number;
1696 append_escaped_text (GString *str,
1704 end = text + length;
1709 next = g_utf8_next_char (p);
1714 g_string_append (str, "&");
1718 g_string_append (str, "<");
1722 g_string_append (str, ">");
1726 g_string_append (str, "'");
1730 g_string_append (str, """);
1734 g_string_append_len (str, p, next - p);
1743 * g_markup_escape_text:
1744 * @text: some valid UTF-8 text
1745 * @length: length of @text in bytes
1747 * Escapes text so that the markup parser will parse it verbatim.
1748 * Less than, greater than, ampersand, etc. are replaced with the
1749 * corresponding entities. This function would typically be used
1750 * when writing out a file to be parsed with the markup parser.
1752 * Return value: escaped text
1755 g_markup_escape_text (const gchar *text,
1760 g_return_val_if_fail (text != NULL, NULL);
1763 length = strlen (text);
1765 str = g_string_new ("");
1766 append_escaped_text (str, text, length);
1768 return g_string_free (str, FALSE);