1 /* gmarkup.c - Simple XML-like parser
3 * Copyright 2000, 2003 Red Hat, Inc.
5 * GLib is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * GLib is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with GLib; see the file COPYING.LIB. If not,
17 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
34 g_markup_error_quark (void)
36 static GQuark error_quark = 0;
39 error_quark = g_quark_from_static_string ("g-markup-error-quark");
47 STATE_AFTER_OPEN_ANGLE,
48 STATE_AFTER_CLOSE_ANGLE,
49 STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
50 STATE_INSIDE_OPEN_TAG_NAME,
51 STATE_INSIDE_ATTRIBUTE_NAME,
52 STATE_BETWEEN_ATTRIBUTES,
53 STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
54 STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
55 STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
57 STATE_AFTER_CLOSE_TAG_SLASH,
58 STATE_INSIDE_CLOSE_TAG_NAME,
59 STATE_INSIDE_PASSTHROUGH,
63 struct _GMarkupParseContext
65 const GMarkupParser *parser;
67 GMarkupParseFlags flags;
73 GDestroyNotify dnotify;
75 /* A piece of character data or an element that
76 * hasn't "ended" yet so we haven't yet called
77 * the callback for it.
79 GString *partial_chunk;
81 GMarkupParseState state;
88 const gchar *current_text;
89 gssize current_text_len;
90 const gchar *current_text_end;
92 GString *leftover_char_portion;
94 /* used to save the start of the last interesting thingy */
99 guint document_empty : 1;
105 * g_markup_parse_context_new:
106 * @parser: a #GMarkupParser
107 * @flags: one or more #GMarkupParseFlags
108 * @user_data: user data to pass to #GMarkupParser functions
109 * @user_data_dnotify: user data destroy notifier called when the parse context is freed
111 * Creates a new parse context. A parse context is used to parse
112 * marked-up documents. You can feed any number of documents into
113 * a context, as long as no errors occur; once an error occurs,
114 * the parse context can't continue to parse text (you have to free it
115 * and create a new parse context).
117 * Return value: a new #GMarkupParseContext
119 GMarkupParseContext *
120 g_markup_parse_context_new (const GMarkupParser *parser,
121 GMarkupParseFlags flags,
123 GDestroyNotify user_data_dnotify)
125 GMarkupParseContext *context;
127 g_return_val_if_fail (parser != NULL, NULL);
129 context = g_new (GMarkupParseContext, 1);
131 context->parser = parser;
132 context->flags = flags;
133 context->user_data = user_data;
134 context->dnotify = user_data_dnotify;
136 context->line_number = 1;
137 context->char_number = 1;
139 context->partial_chunk = NULL;
141 context->state = STATE_START;
142 context->tag_stack = NULL;
143 context->attr_names = NULL;
144 context->attr_values = NULL;
145 context->cur_attr = -1;
146 context->alloc_attrs = 0;
148 context->current_text = NULL;
149 context->current_text_len = -1;
150 context->current_text_end = NULL;
151 context->leftover_char_portion = NULL;
153 context->start = NULL;
154 context->iter = NULL;
156 context->document_empty = TRUE;
157 context->parsing = FALSE;
159 context->balance = 0;
165 * g_markup_parse_context_free:
166 * @context: a #GMarkupParseContext
168 * Frees a #GMarkupParseContext. Can't be called from inside
169 * one of the #GMarkupParser functions.
173 g_markup_parse_context_free (GMarkupParseContext *context)
175 g_return_if_fail (context != NULL);
176 g_return_if_fail (!context->parsing);
178 if (context->dnotify)
179 (* context->dnotify) (context->user_data);
181 g_strfreev (context->attr_names);
182 g_strfreev (context->attr_values);
184 g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
185 g_slist_free (context->tag_stack);
187 if (context->partial_chunk)
188 g_string_free (context->partial_chunk, TRUE);
190 if (context->leftover_char_portion)
191 g_string_free (context->leftover_char_portion, TRUE);
197 mark_error (GMarkupParseContext *context,
200 context->state = STATE_ERROR;
202 if (context->parser->error)
203 (*context->parser->error) (context, error, context->user_data);
207 set_error (GMarkupParseContext *context,
217 va_start (args, format);
218 s = g_strdup_vprintf (format, args);
221 tmp_error = g_error_new (G_MARKUP_ERROR,
223 _("Error on line %d char %d: %s"),
224 context->line_number,
225 context->char_number,
230 mark_error (context, tmp_error);
232 g_propagate_error (error, tmp_error);
236 is_name_start_char (gunichar c)
238 if (g_unichar_isalpha (c) ||
247 is_name_char (gunichar c)
249 if (g_unichar_isalnum (c) ||
261 char_str (gunichar c,
265 g_unichar_to_utf8 (c, buf);
270 utf8_str (const gchar *utf8,
273 char_str (g_utf8_get_char (utf8), buf);
278 set_unescape_error (GMarkupParseContext *context,
280 const gchar *remaining_text,
281 const gchar *remaining_text_end,
289 gint remaining_newlines;
292 remaining_newlines = 0;
294 while (p != remaining_text_end)
297 ++remaining_newlines;
301 va_start (args, format);
302 s = g_strdup_vprintf (format, args);
305 tmp_error = g_error_new (G_MARKUP_ERROR,
307 _("Error on line %d: %s"),
308 context->line_number - remaining_newlines,
313 mark_error (context, tmp_error);
315 g_propagate_error (error, tmp_error);
321 USTATE_AFTER_AMPERSAND,
322 USTATE_INSIDE_ENTITY_NAME,
323 USTATE_AFTER_CHARREF_HASH
327 unescape_text (GMarkupParseContext *context,
329 const gchar *text_end,
333 #define MAX_ENT_LEN 5
338 gboolean normalize_attribute;
340 str = g_string_new (NULL);
342 if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
343 context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
344 normalize_attribute = TRUE;
346 normalize_attribute = FALSE;
348 state = USTATE_INSIDE_TEXT;
351 while (p != text_end && context->state != STATE_ERROR)
353 g_assert (p < text_end);
357 case USTATE_INSIDE_TEXT:
359 while (p != text_end && *p != '&')
361 if ((*p == '\t' || *p == '\n') && normalize_attribute)
363 g_string_append_len (str, start, p - start);
364 g_string_append_c (str, ' ');
365 p = g_utf8_next_char (p);
370 g_string_append_len (str, start, p - start);
371 g_string_append_c (str, normalize_attribute ? ' ' : '\n');
372 p = g_utf8_next_char (p);
374 p = g_utf8_next_char (p);
378 p = g_utf8_next_char (p);
383 g_string_append_len (str, start, p - start);
388 if (p != text_end && *p == '&')
390 p = g_utf8_next_char (p);
391 state = USTATE_AFTER_AMPERSAND;
396 case USTATE_AFTER_AMPERSAND:
400 p = g_utf8_next_char (p);
403 state = USTATE_AFTER_CHARREF_HASH;
405 else if (!is_name_start_char (g_utf8_get_char (p)))
409 set_unescape_error (context, error,
411 G_MARKUP_ERROR_PARSE,
412 _("Empty entity '&;' seen; valid "
413 "entities are: & " < > '"));
419 set_unescape_error (context, error,
421 G_MARKUP_ERROR_PARSE,
422 _("Character '%s' is not valid at "
423 "the start of an entity name; "
424 "the & character begins an entity; "
425 "if this ampersand isn't supposed "
426 "to be an entity, escape it as "
434 state = USTATE_INSIDE_ENTITY_NAME;
440 case USTATE_INSIDE_ENTITY_NAME:
442 gchar buf[MAX_ENT_LEN+1] = {
443 '\0', '\0', '\0', '\0', '\0', '\0'
447 while (p != text_end)
451 else if (!is_name_char (*p))
455 set_unescape_error (context, error,
457 G_MARKUP_ERROR_PARSE,
458 _("Character '%s' is not valid "
459 "inside an entity name"),
464 p = g_utf8_next_char (p);
467 if (context->state != STATE_ERROR)
482 /* move to after semicolon */
483 p = g_utf8_next_char (p);
485 state = USTATE_INSIDE_TEXT;
487 if (strcmp (buf, "lt") == 0)
488 g_string_append_c (str, '<');
489 else if (strcmp (buf, "gt") == 0)
490 g_string_append_c (str, '>');
491 else if (strcmp (buf, "amp") == 0)
492 g_string_append_c (str, '&');
493 else if (strcmp (buf, "quot") == 0)
494 g_string_append_c (str, '"');
495 else if (strcmp (buf, "apos") == 0)
496 g_string_append_c (str, '\'');
499 set_unescape_error (context, error,
501 G_MARKUP_ERROR_PARSE,
502 _("Entity name '%s' is not known"),
508 set_unescape_error (context, error,
509 /* give line number of the & */
511 G_MARKUP_ERROR_PARSE,
512 _("Entity did not end with a semicolon; "
513 "most likely you used an ampersand "
514 "character without intending to start "
515 "an entity - escape ampersand as &"));
521 case USTATE_AFTER_CHARREF_HASH:
523 gboolean is_hex = FALSE;
527 p = g_utf8_next_char (p);
531 while (p != text_end && *p != ';')
532 p = g_utf8_next_char (p);
536 g_assert (*p == ';');
538 /* digit is between start and p */
542 gchar *digit = g_strndup (start, p - start);
545 gchar *digit_end = digit + (p - start);
549 l = strtoul (digit, &end, 16);
551 l = strtoul (digit, &end, 10);
553 if (end != digit_end || errno != 0)
555 set_unescape_error (context, error,
557 G_MARKUP_ERROR_PARSE,
558 _("Failed to parse '%s', which "
559 "should have been a digit "
560 "inside a character reference "
561 "(ê for example) - perhaps "
562 "the digit is too large"),
567 /* characters XML permits */
571 (l >= 0x20 && l <= 0xD7FF) ||
572 (l >= 0xE000 && l <= 0xFFFD) ||
573 (l >= 0x10000 && l <= 0x10FFFF))
576 g_string_append (str, char_str (l, buf));
580 set_unescape_error (context, error,
582 G_MARKUP_ERROR_PARSE,
583 _("Character reference '%s' does not encode a permitted character"),
590 /* Move to next state */
591 p = g_utf8_next_char (p); /* past semicolon */
593 state = USTATE_INSIDE_TEXT;
597 set_unescape_error (context, error,
599 G_MARKUP_ERROR_PARSE,
600 _("Empty character reference; "
601 "should include a digit such as "
607 set_unescape_error (context, error,
609 G_MARKUP_ERROR_PARSE,
610 _("Character reference did not end with a "
612 "most likely you used an ampersand "
613 "character without intending to start "
614 "an entity - escape ampersand as &"));
620 g_assert_not_reached ();
625 if (context->state != STATE_ERROR)
629 case USTATE_INSIDE_TEXT:
631 case USTATE_AFTER_AMPERSAND:
632 case USTATE_INSIDE_ENTITY_NAME:
633 set_unescape_error (context, error,
635 G_MARKUP_ERROR_PARSE,
636 _("Unfinished entity reference"));
638 case USTATE_AFTER_CHARREF_HASH:
639 set_unescape_error (context, error,
641 G_MARKUP_ERROR_PARSE,
642 _("Unfinished character reference"));
647 if (context->state == STATE_ERROR)
649 g_string_free (str, TRUE);
655 *unescaped = g_string_free (str, FALSE);
663 advance_char (GMarkupParseContext *context)
666 context->iter = g_utf8_next_char (context->iter);
667 context->char_number += 1;
668 if (*context->iter == '\n')
670 context->line_number += 1;
671 context->char_number = 1;
674 return context->iter != context->current_text_end;
680 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
684 skip_spaces (GMarkupParseContext *context)
688 if (!xml_isspace (*context->iter))
691 while (advance_char (context));
695 advance_to_name_end (GMarkupParseContext *context)
699 if (!is_name_char (g_utf8_get_char (context->iter)))
702 while (advance_char (context));
706 add_to_partial (GMarkupParseContext *context,
707 const gchar *text_start,
708 const gchar *text_end)
710 if (context->partial_chunk == NULL)
711 context->partial_chunk = g_string_new (NULL);
713 if (text_start != text_end)
714 g_string_append_len (context->partial_chunk, text_start,
715 text_end - text_start);
717 /* Invariant here that partial_chunk exists */
721 truncate_partial (GMarkupParseContext *context)
723 if (context->partial_chunk != NULL)
725 context->partial_chunk = g_string_truncate (context->partial_chunk, 0);
730 current_element (GMarkupParseContext *context)
732 return context->tag_stack->data;
736 current_attribute (GMarkupParseContext *context)
738 g_assert (context->cur_attr >= 0);
739 return context->attr_names[context->cur_attr];
743 find_current_text_end (GMarkupParseContext *context)
745 /* This function must be safe (non-segfaulting) on invalid UTF8 */
746 const gchar *end = context->current_text + context->current_text_len;
750 g_assert (context->current_text_len > 0);
752 p = context->current_text;
753 next = g_utf8_find_next_char (p, end);
755 while (next && *next)
760 next = g_utf8_find_next_char (p, end);
763 /* p is now the start of the last character or character portion. */
765 next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
769 /* whole character */
770 context->current_text_end = end;
775 context->leftover_char_portion = g_string_new_len (p, end - p);
776 context->current_text_len -= (end - p);
777 context->current_text_end = p;
783 add_attribute (GMarkupParseContext *context, char *name)
785 if (context->cur_attr + 2 >= context->alloc_attrs)
787 context->alloc_attrs += 5; /* silly magic number */
788 context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs);
789 context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs);
792 context->attr_names[context->cur_attr] = name;
793 context->attr_values[context->cur_attr] = NULL;
794 context->attr_names[context->cur_attr+1] = NULL;
795 context->attr_values[context->cur_attr+1] = NULL;
799 * g_markup_parse_context_parse:
800 * @context: a #GMarkupParseContext
801 * @text: chunk of text to parse
802 * @text_len: length of @text in bytes
803 * @error: return location for a #GError
805 * Feed some data to the #GMarkupParseContext. The data need not
806 * be valid UTF-8; an error will be signaled if it's invalid.
807 * The data need not be an entire document; you can feed a document
808 * into the parser incrementally, via multiple calls to this function.
809 * Typically, as you receive data from a network connection or file,
810 * you feed each received chunk of data into this function, aborting
811 * the process if an error occurs. Once an error is reported, no further
812 * data may be fed to the #GMarkupParseContext; all errors are fatal.
814 * Return value: %FALSE if an error occurred, %TRUE on success
817 g_markup_parse_context_parse (GMarkupParseContext *context,
822 const gchar *first_invalid;
824 g_return_val_if_fail (context != NULL, FALSE);
825 g_return_val_if_fail (text != NULL, FALSE);
826 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
827 g_return_val_if_fail (!context->parsing, FALSE);
830 text_len = strlen (text);
835 context->parsing = TRUE;
837 if (context->leftover_char_portion)
839 const gchar *first_char;
841 if ((*text & 0xc0) != 0x80)
844 first_char = g_utf8_find_next_char (text, text + text_len);
848 /* leftover_char_portion was completed. Parse it. */
849 GString *portion = context->leftover_char_portion;
851 g_string_append_len (context->leftover_char_portion,
852 text, first_char - text);
854 /* hacks to allow recursion */
855 context->parsing = FALSE;
856 context->leftover_char_portion = NULL;
858 if (!g_markup_parse_context_parse (context,
859 portion->str, portion->len,
862 g_assert (context->state == STATE_ERROR);
865 g_string_free (portion, TRUE);
866 context->parsing = TRUE;
868 /* Skip the fraction of char that was in this text */
869 text_len -= (first_char - text);
874 /* another little chunk of the leftover char; geez
875 * someone is inefficient.
877 g_string_append_len (context->leftover_char_portion,
880 if (context->leftover_char_portion->len > 7)
882 /* The leftover char portion is too big to be
887 G_MARKUP_ERROR_BAD_UTF8,
888 _("Invalid UTF-8 encoded text"));
895 context->current_text = text;
896 context->current_text_len = text_len;
897 context->iter = context->current_text;
898 context->start = context->iter;
900 /* Nothing left after finishing the leftover char, or nothing
901 * passed in to begin with.
903 if (context->current_text_len == 0)
906 /* find_current_text_end () assumes the string starts at
907 * a character start, so we need to validate at least
908 * that much. It doesn't assume any following bytes
911 if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
915 G_MARKUP_ERROR_BAD_UTF8,
916 _("Invalid UTF-8 encoded text"));
920 /* Initialize context->current_text_end, possibly adjusting
921 * current_text_len, and add any leftover char portion
923 find_current_text_end (context);
925 /* Validate UTF8 (must be done after we find the end, since
926 * we could have a trailing incomplete char)
928 if (!g_utf8_validate (context->current_text,
929 context->current_text_len,
934 p = context->current_text;
935 while (p != context->current_text_end)
942 context->line_number += newlines;
946 G_MARKUP_ERROR_BAD_UTF8,
947 _("Invalid UTF-8 encoded text"));
951 while (context->iter != context->current_text_end)
953 switch (context->state)
956 /* Possible next state: AFTER_OPEN_ANGLE */
958 g_assert (context->tag_stack == NULL);
960 /* whitespace is ignored outside of any elements */
961 skip_spaces (context);
963 if (context->iter != context->current_text_end)
965 if (*context->iter == '<')
967 /* Move after the open angle */
968 advance_char (context);
970 context->state = STATE_AFTER_OPEN_ANGLE;
972 /* this could start a passthrough */
973 context->start = context->iter;
975 /* document is now non-empty */
976 context->document_empty = FALSE;
982 G_MARKUP_ERROR_PARSE,
983 _("Document must begin with an element (e.g. <book>)"));
988 case STATE_AFTER_OPEN_ANGLE:
989 /* Possible next states: INSIDE_OPEN_TAG_NAME,
990 * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
992 if (*context->iter == '?' ||
993 *context->iter == '!')
995 /* include < in the passthrough */
996 const gchar *openangle = "<";
997 add_to_partial (context, openangle, openangle + 1);
998 context->start = context->iter;
999 context->balance = 1;
1000 context->state = STATE_INSIDE_PASSTHROUGH;
1002 else if (*context->iter == '/')
1005 advance_char (context);
1007 context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1009 else if (is_name_start_char (g_utf8_get_char (context->iter)))
1011 context->state = STATE_INSIDE_OPEN_TAG_NAME;
1013 /* start of tag name */
1014 context->start = context->iter;
1021 G_MARKUP_ERROR_PARSE,
1022 _("'%s' is not a valid character following "
1023 "a '<' character; it may not begin an "
1025 utf8_str (context->iter, buf));
1029 /* The AFTER_CLOSE_ANGLE state is actually sort of
1030 * broken, because it doesn't correspond to a range
1031 * of characters in the input stream as the others do,
1032 * and thus makes things harder to conceptualize
1034 case STATE_AFTER_CLOSE_ANGLE:
1035 /* Possible next states: INSIDE_TEXT, STATE_START */
1036 if (context->tag_stack == NULL)
1038 context->start = NULL;
1039 context->state = STATE_START;
1043 context->start = context->iter;
1044 context->state = STATE_INSIDE_TEXT;
1048 case STATE_AFTER_ELISION_SLASH:
1049 /* Possible next state: AFTER_CLOSE_ANGLE */
1052 /* We need to pop the tag stack and call the end_element
1053 * function, since this is the close tag
1055 GError *tmp_error = NULL;
1057 g_assert (context->tag_stack != NULL);
1060 if (context->parser->end_element)
1061 (* context->parser->end_element) (context,
1062 context->tag_stack->data,
1068 mark_error (context, tmp_error);
1069 g_propagate_error (error, tmp_error);
1073 if (*context->iter == '>')
1075 /* move after the close angle */
1076 advance_char (context);
1077 context->state = STATE_AFTER_CLOSE_ANGLE;
1084 G_MARKUP_ERROR_PARSE,
1085 _("Odd character '%s', expected a '>' character "
1086 "to end the start tag of element '%s'"),
1087 utf8_str (context->iter, buf),
1088 current_element (context));
1092 g_free (context->tag_stack->data);
1093 context->tag_stack = g_slist_delete_link (context->tag_stack,
1094 context->tag_stack);
1098 case STATE_INSIDE_OPEN_TAG_NAME:
1099 /* Possible next states: BETWEEN_ATTRIBUTES */
1101 /* if there's a partial chunk then it's the first part of the
1102 * tag name. If there's a context->start then it's the start
1103 * of the tag name in current_text, the partial chunk goes
1104 * before that start though.
1106 advance_to_name_end (context);
1108 if (context->iter == context->current_text_end)
1110 /* The name hasn't necessarily ended. Merge with
1111 * partial chunk, leave state unchanged.
1113 add_to_partial (context, context->start, context->iter);
1117 /* The name has ended. Combine it with the partial chunk
1118 * if any; push it on the stack; enter next state.
1120 add_to_partial (context, context->start, context->iter);
1121 context->tag_stack =
1122 g_slist_prepend (context->tag_stack,
1123 g_string_free (context->partial_chunk,
1126 context->partial_chunk = NULL;
1128 context->state = STATE_BETWEEN_ATTRIBUTES;
1129 context->start = NULL;
1133 case STATE_INSIDE_ATTRIBUTE_NAME:
1134 /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1136 /* read the full name, if we enter the equals sign state
1137 * then add the attribute to the list (without the value),
1138 * otherwise store a partial chunk to be prepended later.
1140 advance_to_name_end (context);
1142 if (context->iter == context->current_text_end)
1144 /* The name hasn't necessarily ended. Merge with
1145 * partial chunk, leave state unchanged.
1147 add_to_partial (context, context->start, context->iter);
1151 /* The name has ended. Combine it with the partial chunk
1152 * if any; push it on the stack; enter next state.
1154 add_to_partial (context, context->start, context->iter);
1156 add_attribute (context, g_string_free (context->partial_chunk, FALSE));
1158 context->partial_chunk = NULL;
1159 context->start = NULL;
1161 if (*context->iter == '=')
1163 advance_char (context);
1164 context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1171 G_MARKUP_ERROR_PARSE,
1172 _("Odd character '%s', expected a '=' after "
1173 "attribute name '%s' of element '%s'"),
1174 utf8_str (context->iter, buf),
1175 current_attribute (context),
1176 current_element (context));
1182 case STATE_BETWEEN_ATTRIBUTES:
1183 /* Possible next states: AFTER_CLOSE_ANGLE,
1184 * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1186 skip_spaces (context);
1188 if (context->iter != context->current_text_end)
1190 if (*context->iter == '/')
1192 advance_char (context);
1193 context->state = STATE_AFTER_ELISION_SLASH;
1195 else if (*context->iter == '>')
1198 advance_char (context);
1199 context->state = STATE_AFTER_CLOSE_ANGLE;
1201 else if (is_name_start_char (g_utf8_get_char (context->iter)))
1203 context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1204 /* start of attribute name */
1205 context->start = context->iter;
1212 G_MARKUP_ERROR_PARSE,
1213 _("Odd character '%s', expected a '>' or '/' "
1214 "character to end the start tag of "
1215 "element '%s', or optionally an attribute; "
1216 "perhaps you used an invalid character in "
1217 "an attribute name"),
1218 utf8_str (context->iter, buf),
1219 current_element (context));
1222 /* If we're done with attributes, invoke
1223 * the start_element callback
1225 if (context->state == STATE_AFTER_ELISION_SLASH ||
1226 context->state == STATE_AFTER_CLOSE_ANGLE)
1228 const gchar *start_name;
1229 /* Ugly, but the current code expects an empty array instead of NULL */
1230 const gchar *empty = NULL;
1231 const gchar **attr_names = ∅
1232 const gchar **attr_values = ∅
1235 /* Call user callback for element start */
1236 start_name = current_element (context);
1238 if (context->cur_attr >= 0)
1240 attr_names = (const gchar**)context->attr_names;
1241 attr_values = (const gchar**)context->attr_values;
1245 if (context->parser->start_element)
1246 (* context->parser->start_element) (context,
1248 (const gchar **)attr_names,
1249 (const gchar **)attr_values,
1253 /* Go ahead and free the attributes. */
1254 for (; context->cur_attr >= 0; context->cur_attr--)
1256 int pos = context->cur_attr;
1257 g_free (context->attr_names[pos]);
1258 g_free (context->attr_values[pos]);
1259 context->attr_names[pos] = context->attr_values[pos] = NULL;
1261 g_assert (context->cur_attr == -1);
1262 g_assert (context->attr_names == NULL ||
1263 context->attr_names[0] == NULL);
1264 g_assert (context->attr_values == NULL ||
1265 context->attr_values[0] == NULL);
1267 if (tmp_error != NULL)
1269 mark_error (context, tmp_error);
1270 g_propagate_error (error, tmp_error);
1276 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1277 /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1278 if (*context->iter == '"')
1280 advance_char (context);
1281 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1282 context->start = context->iter;
1284 else if (*context->iter == '\'')
1286 advance_char (context);
1287 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1288 context->start = context->iter;
1295 G_MARKUP_ERROR_PARSE,
1296 _("Odd character '%s', expected an open quote mark "
1297 "after the equals sign when giving value for "
1298 "attribute '%s' of element '%s'"),
1299 utf8_str (context->iter, buf),
1300 current_attribute (context),
1301 current_element (context));
1305 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1306 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1307 /* Possible next states: BETWEEN_ATTRIBUTES */
1311 if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1322 if (*context->iter == delim)
1325 while (advance_char (context));
1327 if (context->iter == context->current_text_end)
1329 /* The value hasn't necessarily ended. Merge with
1330 * partial chunk, leave state unchanged.
1332 add_to_partial (context, context->start, context->iter);
1336 /* The value has ended at the quote mark. Combine it
1337 * with the partial chunk if any; set it for the current
1340 add_to_partial (context, context->start, context->iter);
1342 g_assert (context->cur_attr >= 0);
1344 if (unescape_text (context,
1345 context->partial_chunk->str,
1346 context->partial_chunk->str +
1347 context->partial_chunk->len,
1348 &context->attr_values[context->cur_attr],
1351 /* success, advance past quote and set state. */
1352 advance_char (context);
1353 context->state = STATE_BETWEEN_ATTRIBUTES;
1354 context->start = NULL;
1357 truncate_partial (context);
1361 case STATE_INSIDE_TEXT:
1362 /* Possible next states: AFTER_OPEN_ANGLE */
1365 if (*context->iter == '<')
1368 while (advance_char (context));
1370 /* The text hasn't necessarily ended. Merge with
1371 * partial chunk, leave state unchanged.
1374 add_to_partial (context, context->start, context->iter);
1376 if (context->iter != context->current_text_end)
1378 gchar *unescaped = NULL;
1380 /* The text has ended at the open angle. Call the text
1384 if (unescape_text (context,
1385 context->partial_chunk->str,
1386 context->partial_chunk->str +
1387 context->partial_chunk->len,
1391 GError *tmp_error = NULL;
1393 if (context->parser->text)
1394 (*context->parser->text) (context,
1402 if (tmp_error == NULL)
1404 /* advance past open angle and set state. */
1405 advance_char (context);
1406 context->state = STATE_AFTER_OPEN_ANGLE;
1407 /* could begin a passthrough */
1408 context->start = context->iter;
1412 mark_error (context, tmp_error);
1413 g_propagate_error (error, tmp_error);
1417 truncate_partial (context);
1421 case STATE_AFTER_CLOSE_TAG_SLASH:
1422 /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1423 if (is_name_start_char (g_utf8_get_char (context->iter)))
1425 context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1427 /* start of tag name */
1428 context->start = context->iter;
1435 G_MARKUP_ERROR_PARSE,
1436 _("'%s' is not a valid character following "
1437 "the characters '</'; '%s' may not begin an "
1439 utf8_str (context->iter, buf),
1440 utf8_str (context->iter, buf));
1444 case STATE_INSIDE_CLOSE_TAG_NAME:
1445 /* Possible next state: AFTER_CLOSE_ANGLE */
1446 advance_to_name_end (context);
1448 if (context->iter == context->current_text_end)
1450 /* The name hasn't necessarily ended. Merge with
1451 * partial chunk, leave state unchanged.
1453 add_to_partial (context, context->start, context->iter);
1457 /* The name has ended. Combine it with the partial chunk
1458 * if any; check that it matches stack top and pop
1459 * stack; invoke proper callback; enter next state.
1463 add_to_partial (context, context->start, context->iter);
1465 close_name = g_string_free (context->partial_chunk, FALSE);
1466 context->partial_chunk = NULL;
1468 if (*context->iter != '>')
1473 G_MARKUP_ERROR_PARSE,
1474 _("'%s' is not a valid character following "
1475 "the close element name '%s'; the allowed "
1476 "character is '>'"),
1477 utf8_str (context->iter, buf),
1480 else if (context->tag_stack == NULL)
1484 G_MARKUP_ERROR_PARSE,
1485 _("Element '%s' was closed, no element "
1486 "is currently open"),
1489 else if (strcmp (close_name, current_element (context)) != 0)
1493 G_MARKUP_ERROR_PARSE,
1494 _("Element '%s' was closed, but the currently "
1495 "open element is '%s'"),
1497 current_element (context));
1502 advance_char (context);
1503 context->state = STATE_AFTER_CLOSE_ANGLE;
1504 context->start = NULL;
1506 /* call the end_element callback */
1508 if (context->parser->end_element)
1509 (* context->parser->end_element) (context,
1515 /* Pop the tag stack */
1516 g_free (context->tag_stack->data);
1517 context->tag_stack = g_slist_delete_link (context->tag_stack,
1518 context->tag_stack);
1522 mark_error (context, tmp_error);
1523 g_propagate_error (error, tmp_error);
1527 g_free (close_name);
1531 case STATE_INSIDE_PASSTHROUGH:
1532 /* Possible next state: AFTER_CLOSE_ANGLE */
1535 if (*context->iter == '<')
1537 if (*context->iter == '>')
1540 add_to_partial (context, context->start, context->iter);
1541 context->start = context->iter;
1542 if ((g_str_has_prefix (context->partial_chunk->str, "<?")
1543 && g_str_has_suffix (context->partial_chunk->str, "?")) ||
1544 (g_str_has_prefix (context->partial_chunk->str, "<!--")
1545 && g_str_has_suffix (context->partial_chunk->str, "--")) ||
1546 (g_str_has_prefix (context->partial_chunk->str, "<![CDATA[")
1547 && g_str_has_suffix (context->partial_chunk->str, "]]")) ||
1548 (g_str_has_prefix (context->partial_chunk->str, "<!DOCTYPE")
1549 && context->balance == 0))
1553 while (advance_char (context));
1555 if (context->iter == context->current_text_end)
1557 /* The passthrough hasn't necessarily ended. Merge with
1558 * partial chunk, leave state unchanged.
1560 add_to_partial (context, context->start, context->iter);
1564 /* The passthrough has ended at the close angle. Combine
1565 * it with the partial chunk if any. Call the passthrough
1566 * callback. Note that the open/close angles are
1567 * included in the text of the passthrough.
1569 GError *tmp_error = NULL;
1571 advance_char (context); /* advance past close angle */
1572 add_to_partial (context, context->start, context->iter);
1574 if (context->parser->passthrough)
1575 (*context->parser->passthrough) (context,
1576 context->partial_chunk->str,
1577 context->partial_chunk->len,
1581 truncate_partial (context);
1583 if (tmp_error == NULL)
1585 context->state = STATE_AFTER_CLOSE_ANGLE;
1586 context->start = context->iter; /* could begin text */
1590 mark_error (context, tmp_error);
1591 g_propagate_error (error, tmp_error);
1601 g_assert_not_reached ();
1607 context->parsing = FALSE;
1609 return context->state != STATE_ERROR;
1613 * g_markup_parse_context_end_parse:
1614 * @context: a #GMarkupParseContext
1615 * @error: return location for a #GError
1617 * Signals to the #GMarkupParseContext that all data has been
1618 * fed into the parse context with g_markup_parse_context_parse().
1619 * This function reports an error if the document isn't complete,
1620 * for example if elements are still open.
1622 * Return value: %TRUE on success, %FALSE if an error was set
1625 g_markup_parse_context_end_parse (GMarkupParseContext *context,
1628 g_return_val_if_fail (context != NULL, FALSE);
1629 g_return_val_if_fail (!context->parsing, FALSE);
1630 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1632 if (context->partial_chunk != NULL)
1634 g_string_free (context->partial_chunk, TRUE);
1635 context->partial_chunk = NULL;
1638 if (context->document_empty)
1640 set_error (context, error, G_MARKUP_ERROR_EMPTY,
1641 _("Document was empty or contained only whitespace"));
1645 context->parsing = TRUE;
1647 switch (context->state)
1653 case STATE_AFTER_OPEN_ANGLE:
1654 set_error (context, error, G_MARKUP_ERROR_PARSE,
1655 _("Document ended unexpectedly just after an open angle bracket '<'"));
1658 case STATE_AFTER_CLOSE_ANGLE:
1659 if (context->tag_stack != NULL)
1661 /* Error message the same as for INSIDE_TEXT */
1662 set_error (context, error, G_MARKUP_ERROR_PARSE,
1663 _("Document ended unexpectedly with elements still open - "
1664 "'%s' was the last element opened"),
1665 current_element (context));
1669 case STATE_AFTER_ELISION_SLASH:
1670 set_error (context, error, G_MARKUP_ERROR_PARSE,
1671 _("Document ended unexpectedly, expected to see a close angle "
1672 "bracket ending the tag <%s/>"), current_element (context));
1675 case STATE_INSIDE_OPEN_TAG_NAME:
1676 set_error (context, error, G_MARKUP_ERROR_PARSE,
1677 _("Document ended unexpectedly inside an element name"));
1680 case STATE_INSIDE_ATTRIBUTE_NAME:
1681 set_error (context, error, G_MARKUP_ERROR_PARSE,
1682 _("Document ended unexpectedly inside an attribute name"));
1685 case STATE_BETWEEN_ATTRIBUTES:
1686 set_error (context, error, G_MARKUP_ERROR_PARSE,
1687 _("Document ended unexpectedly inside an element-opening "
1691 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1692 set_error (context, error, G_MARKUP_ERROR_PARSE,
1693 _("Document ended unexpectedly after the equals sign "
1694 "following an attribute name; no attribute value"));
1697 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1698 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1699 set_error (context, error, G_MARKUP_ERROR_PARSE,
1700 _("Document ended unexpectedly while inside an attribute "
1704 case STATE_INSIDE_TEXT:
1705 g_assert (context->tag_stack != NULL);
1706 set_error (context, error, G_MARKUP_ERROR_PARSE,
1707 _("Document ended unexpectedly with elements still open - "
1708 "'%s' was the last element opened"),
1709 current_element (context));
1712 case STATE_AFTER_CLOSE_TAG_SLASH:
1713 case STATE_INSIDE_CLOSE_TAG_NAME:
1714 set_error (context, error, G_MARKUP_ERROR_PARSE,
1715 _("Document ended unexpectedly inside the close tag for "
1716 "element '%s'"), current_element);
1719 case STATE_INSIDE_PASSTHROUGH:
1720 set_error (context, error, G_MARKUP_ERROR_PARSE,
1721 _("Document ended unexpectedly inside a comment or "
1722 "processing instruction"));
1727 g_assert_not_reached ();
1731 context->parsing = FALSE;
1733 return context->state != STATE_ERROR;
1737 * g_markup_parse_context_get_element:
1738 * @context: a #GMarkupParseContext
1739 * @returns: the name of the currently open element, or %NULL
1741 * Retrieves the name of the currently open element.
1745 G_CONST_RETURN gchar *
1746 g_markup_parse_context_get_element (GMarkupParseContext *context)
1748 g_return_val_if_fail (context != NULL, NULL);
1750 if (context->tag_stack == NULL)
1753 return current_element (context);
1757 * g_markup_parse_context_get_position:
1758 * @context: a #GMarkupParseContext
1759 * @line_number: return location for a line number, or %NULL
1760 * @char_number: return location for a char-on-line number, or %NULL
1762 * Retrieves the current line number and the number of the character on
1763 * that line. Intended for use in error messages; there are no strict
1764 * semantics for what constitutes the "current" line number other than
1765 * "the best number we could come up with for error messages."
1769 g_markup_parse_context_get_position (GMarkupParseContext *context,
1773 g_return_if_fail (context != NULL);
1776 *line_number = context->line_number;
1779 *char_number = context->char_number;
1783 append_escaped_text (GString *str,
1791 end = text + length;
1796 next = g_utf8_next_char (p);
1801 g_string_append (str, "&");
1805 g_string_append (str, "<");
1809 g_string_append (str, ">");
1813 g_string_append (str, "'");
1817 g_string_append (str, """);
1821 g_string_append_len (str, p, next - p);
1830 * g_markup_escape_text:
1831 * @text: some valid UTF-8 text
1832 * @length: length of @text in bytes
1834 * Escapes text so that the markup parser will parse it verbatim.
1835 * Less than, greater than, ampersand, etc. are replaced with the
1836 * corresponding entities. This function would typically be used
1837 * when writing out a file to be parsed with the markup parser.
1839 * Note that this function doesn't protect whitespace and line endings
1840 * from being processed according to the XML rules for normalization
1841 * of line endings and attribute values.
1843 * Return value: escaped text
1846 g_markup_escape_text (const gchar *text,
1851 g_return_val_if_fail (text != NULL, NULL);
1854 length = strlen (text);
1856 str = g_string_new (NULL);
1857 append_escaped_text (str, text, length);
1859 return g_string_free (str, FALSE);
1864 * @format: a printf-style format string
1865 * @after: location to store a pointer to the character after
1866 * the returned conversion. On a %NULL return, returns the
1867 * pointer to the trailing NUL in the string
1869 * Find the next conversion in a printf-style format string.
1870 * Partially based on code from printf-parser.c,
1871 * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
1873 * Return value: pointer to the next conversion in @format,
1874 * or %NULL, if none.
1877 find_conversion (const char *format,
1880 const char *start = format;
1883 while (*start != '\0' && *start != '%')
1900 /* Test for positional argument. */
1901 if (*cp >= '0' && *cp <= '9')
1905 for (np = cp; *np >= '0' && *np <= '9'; np++)
1911 /* Skip the flags. */
1925 /* Skip the field width. */
1930 /* Test for positional argument. */
1931 if (*cp >= '0' && *cp <= '9')
1935 for (np = cp; *np >= '0' && *np <= '9'; np++)
1943 for (; *cp >= '0' && *cp <= '9'; cp++)
1947 /* Skip the precision. */
1953 /* Test for positional argument. */
1954 if (*cp >= '0' && *cp <= '9')
1958 for (np = cp; *np >= '0' && *np <= '9'; np++)
1966 for (; *cp >= '0' && *cp <= '9'; cp++)
1971 /* Skip argument type/size specifiers. */
1972 while (*cp == 'h' ||
1981 /* Skip the conversion character. */
1989 * g_markup_vprintf_escaped:
1990 * @format: printf() style format string
1991 * @args: variable argument list, similar to vprintf()
1993 * Formats the data in @args according to @format, escaping
1994 * all string and character arguments in the fashion
1995 * of g_markup_escape_text(). See g_markup_printf_escaped().
1997 * Return value: newly allocated result from formatting
1998 * operation. Free with g_free().
2003 g_markup_vprintf_escaped (const char *format,
2008 GString *result = NULL;
2009 gchar *output1 = NULL;
2010 gchar *output2 = NULL;
2011 const char *p, *op1, *op2;
2014 /* The technique here, is that we make two format strings that
2015 * have the identical conversions in the identical order to the
2016 * original strings, but differ in the text in-between. We
2017 * then use the normal g_strdup_vprintf() to format the arguments
2018 * with the two new format strings. By comparing the results,
2019 * we can figure out what segments of the output come from
2020 * the the original format string, and what from the arguments,
2021 * and thus know what portions of the string to escape.
2023 * For instance, for:
2025 * g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2027 * We form the two format strings "%sX%dX" and %sY%sY". The results
2028 * of formatting with those two strings are
2030 * "%sX%dX" => "Susan & FredX5X"
2031 * "%sY%dY" => "Susan & FredY5Y"
2033 * To find the span of the first argument, we find the first position
2034 * where the two arguments differ, which tells us that the first
2035 * argument formatted to "Susan & Fred". We then escape that
2036 * to "Susan & Fred" and join up with the intermediate portions
2037 * of the format string and the second argument to get
2038 * "Susan & Fred ate 5 apples".
2041 /* Create the two modified format strings
2043 format1 = g_string_new (NULL);
2044 format2 = g_string_new (NULL);
2049 const char *conv = find_conversion (p, &after);
2053 g_string_append_len (format1, conv, after - conv);
2054 g_string_append_c (format1, 'X');
2055 g_string_append_len (format2, conv, after - conv);
2056 g_string_append_c (format2, 'Y');
2061 /* Use them to format the arguments
2063 G_VA_COPY (args2, args);
2065 output1 = g_strdup_vprintf (format1->str, args);
2070 output2 = g_strdup_vprintf (format2->str, args2);
2075 result = g_string_new (NULL);
2077 /* Iterate through the original format string again,
2078 * copying the non-conversion portions and the escaped
2079 * converted arguments to the output string.
2087 const char *output_start;
2088 const char *conv = find_conversion (p, &after);
2091 if (!conv) /* The end, after points to the trailing \0 */
2093 g_string_append_len (result, p, after - p);
2097 g_string_append_len (result, p, conv - p);
2099 while (*op1 == *op2)
2105 escaped = g_markup_escape_text (output_start, op1 - output_start);
2106 g_string_append (result, escaped);
2115 g_string_free (format1, TRUE);
2116 g_string_free (format2, TRUE);
2121 return g_string_free (result, FALSE);
2127 * g_markup_printf_escaped:
2128 * @format: printf() style format string
2129 * @Varargs: the arguments to insert in the format string
2131 * Formats arguments according to @format, escaping
2132 * all string and character arguments in the fashion
2133 * of g_markup_escape_text(). This is useful when you
2134 * want to insert literal strings into XML-style markup
2135 * output, without having to worry that the strings
2136 * might themselves contain markup.
2138 * <informalexample><programlisting>
2139 * const char *store = "Fortnum & Mason";
2140 * const char *item = "Tea";
2143 * output = g_markup_printf_escaped ("<purchase>"
2144 * "<store>%s</store>"
2145 * "<item>%s</item>"
2146 * "</purchase>",
2148 * </programlisting></informalexample>
2150 * Return value: newly allocated result from formatting
2151 * operation. Free with g_free().
2156 g_markup_printf_escaped (const char *format, ...)
2161 va_start (args, format);
2162 result = g_markup_vprintf_escaped (format, args);