1 /* gmarkup.c - Simple XML-like parser
3 * Copyright 2000, 2003 Red Hat, Inc.
5 * GLib is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * GLib is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with GLib; see the file COPYING.LIB. If not,
17 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
34 g_markup_error_quark (void)
36 static GQuark error_quark = 0;
39 error_quark = g_quark_from_static_string ("g-markup-error-quark");
47 STATE_AFTER_OPEN_ANGLE,
48 STATE_AFTER_CLOSE_ANGLE,
49 STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
50 STATE_INSIDE_OPEN_TAG_NAME,
51 STATE_INSIDE_ATTRIBUTE_NAME,
52 STATE_BETWEEN_ATTRIBUTES,
53 STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
54 STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
55 STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
57 STATE_AFTER_CLOSE_TAG_SLASH,
58 STATE_INSIDE_CLOSE_TAG_NAME,
59 STATE_INSIDE_PASSTHROUGH,
63 struct _GMarkupParseContext
65 const GMarkupParser *parser;
67 GMarkupParseFlags flags;
73 GDestroyNotify dnotify;
75 /* A piece of character data or an element that
76 * hasn't "ended" yet so we haven't yet called
77 * the callback for it.
79 GString *partial_chunk;
81 GMarkupParseState state;
88 const gchar *current_text;
89 gssize current_text_len;
90 const gchar *current_text_end;
92 GString *leftover_char_portion;
94 /* used to save the start of the last interesting thingy */
99 guint document_empty : 1;
105 * g_markup_parse_context_new:
106 * @parser: a #GMarkupParser
107 * @flags: one or more #GMarkupParseFlags
108 * @user_data: user data to pass to #GMarkupParser functions
109 * @user_data_dnotify: user data destroy notifier called when the parse context is freed
111 * Creates a new parse context. A parse context is used to parse
112 * marked-up documents. You can feed any number of documents into
113 * a context, as long as no errors occur; once an error occurs,
114 * the parse context can't continue to parse text (you have to free it
115 * and create a new parse context).
117 * Return value: a new #GMarkupParseContext
119 GMarkupParseContext *
120 g_markup_parse_context_new (const GMarkupParser *parser,
121 GMarkupParseFlags flags,
123 GDestroyNotify user_data_dnotify)
125 GMarkupParseContext *context;
127 g_return_val_if_fail (parser != NULL, NULL);
129 context = g_new (GMarkupParseContext, 1);
131 context->parser = parser;
132 context->flags = flags;
133 context->user_data = user_data;
134 context->dnotify = user_data_dnotify;
136 context->line_number = 1;
137 context->char_number = 1;
139 context->partial_chunk = NULL;
141 context->state = STATE_START;
142 context->tag_stack = NULL;
143 context->attr_names = NULL;
144 context->attr_values = NULL;
145 context->cur_attr = -1;
146 context->alloc_attrs = 0;
148 context->current_text = NULL;
149 context->current_text_len = -1;
150 context->current_text_end = NULL;
151 context->leftover_char_portion = NULL;
153 context->start = NULL;
154 context->iter = NULL;
156 context->document_empty = TRUE;
157 context->parsing = FALSE;
159 context->balance = 0;
165 * g_markup_parse_context_free:
166 * @context: a #GMarkupParseContext
168 * Frees a #GMarkupParseContext. Can't be called from inside
169 * one of the #GMarkupParser functions.
173 g_markup_parse_context_free (GMarkupParseContext *context)
175 g_return_if_fail (context != NULL);
176 g_return_if_fail (!context->parsing);
178 if (context->dnotify)
179 (* context->dnotify) (context->user_data);
181 g_strfreev (context->attr_names);
182 g_strfreev (context->attr_values);
184 g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
185 g_slist_free (context->tag_stack);
187 if (context->partial_chunk)
188 g_string_free (context->partial_chunk, TRUE);
190 if (context->leftover_char_portion)
191 g_string_free (context->leftover_char_portion, TRUE);
197 mark_error (GMarkupParseContext *context,
200 context->state = STATE_ERROR;
202 if (context->parser->error)
203 (*context->parser->error) (context, error, context->user_data);
207 set_error (GMarkupParseContext *context,
217 va_start (args, format);
218 s = g_strdup_vprintf (format, args);
221 tmp_error = g_error_new (G_MARKUP_ERROR,
223 _("Error on line %d char %d: %s"),
224 context->line_number,
225 context->char_number,
230 mark_error (context, tmp_error);
232 g_propagate_error (error, tmp_error);
236 is_name_start_char (gunichar c)
238 if (g_unichar_isalpha (c) ||
247 is_name_char (gunichar c)
249 if (g_unichar_isalnum (c) ||
261 char_str (gunichar c,
265 g_unichar_to_utf8 (c, buf);
270 utf8_str (const gchar *utf8,
273 char_str (g_utf8_get_char (utf8), buf);
278 set_unescape_error (GMarkupParseContext *context,
280 const gchar *remaining_text,
281 const gchar *remaining_text_end,
289 gint remaining_newlines;
292 remaining_newlines = 0;
294 while (p != remaining_text_end)
297 ++remaining_newlines;
301 va_start (args, format);
302 s = g_strdup_vprintf (format, args);
305 tmp_error = g_error_new (G_MARKUP_ERROR,
307 _("Error on line %d: %s"),
308 context->line_number - remaining_newlines,
313 mark_error (context, tmp_error);
315 g_propagate_error (error, tmp_error);
321 USTATE_AFTER_AMPERSAND,
322 USTATE_INSIDE_ENTITY_NAME,
323 USTATE_AFTER_CHARREF_HASH
327 unescape_text (GMarkupParseContext *context,
329 const gchar *text_end,
333 #define MAX_ENT_LEN 5
339 str = g_string_new (NULL);
341 state = USTATE_INSIDE_TEXT;
344 while (p != text_end && context->state != STATE_ERROR)
346 g_assert (p < text_end);
350 case USTATE_INSIDE_TEXT:
352 while (p != text_end && *p != '&')
353 p = g_utf8_next_char (p);
357 g_string_append_len (str, start, p - start);
362 if (p != text_end && *p == '&')
364 p = g_utf8_next_char (p);
365 state = USTATE_AFTER_AMPERSAND;
370 case USTATE_AFTER_AMPERSAND:
374 p = g_utf8_next_char (p);
377 state = USTATE_AFTER_CHARREF_HASH;
379 else if (!is_name_start_char (g_utf8_get_char (p)))
383 set_unescape_error (context, error,
385 G_MARKUP_ERROR_PARSE,
386 _("Empty entity '&;' seen; valid "
387 "entities are: & " < > '"));
393 set_unescape_error (context, error,
395 G_MARKUP_ERROR_PARSE,
396 _("Character '%s' is not valid at "
397 "the start of an entity name; "
398 "the & character begins an entity; "
399 "if this ampersand isn't supposed "
400 "to be an entity, escape it as "
408 state = USTATE_INSIDE_ENTITY_NAME;
414 case USTATE_INSIDE_ENTITY_NAME:
416 gchar buf[MAX_ENT_LEN+1] = {
417 '\0', '\0', '\0', '\0', '\0', '\0'
421 while (p != text_end)
425 else if (!is_name_char (*p))
429 set_unescape_error (context, error,
431 G_MARKUP_ERROR_PARSE,
432 _("Character '%s' is not valid "
433 "inside an entity name"),
438 p = g_utf8_next_char (p);
441 if (context->state != STATE_ERROR)
456 /* move to after semicolon */
457 p = g_utf8_next_char (p);
459 state = USTATE_INSIDE_TEXT;
461 if (strcmp (buf, "lt") == 0)
462 g_string_append_c (str, '<');
463 else if (strcmp (buf, "gt") == 0)
464 g_string_append_c (str, '>');
465 else if (strcmp (buf, "amp") == 0)
466 g_string_append_c (str, '&');
467 else if (strcmp (buf, "quot") == 0)
468 g_string_append_c (str, '"');
469 else if (strcmp (buf, "apos") == 0)
470 g_string_append_c (str, '\'');
473 set_unescape_error (context, error,
475 G_MARKUP_ERROR_PARSE,
476 _("Entity name '%s' is not known"),
482 set_unescape_error (context, error,
483 /* give line number of the & */
485 G_MARKUP_ERROR_PARSE,
486 _("Entity did not end with a semicolon; "
487 "most likely you used an ampersand "
488 "character without intending to start "
489 "an entity - escape ampersand as &"));
495 case USTATE_AFTER_CHARREF_HASH:
497 gboolean is_hex = FALSE;
501 p = g_utf8_next_char (p);
505 while (p != text_end && *p != ';')
506 p = g_utf8_next_char (p);
510 g_assert (*p == ';');
512 /* digit is between start and p */
516 gchar *digit = g_strndup (start, p - start);
519 gchar *digit_end = digit + (p - start);
523 l = strtoul (digit, &end, 16);
525 l = strtoul (digit, &end, 10);
527 if (end != digit_end || errno != 0)
529 set_unescape_error (context, error,
531 G_MARKUP_ERROR_PARSE,
532 _("Failed to parse '%s', which "
533 "should have been a digit "
534 "inside a character reference "
535 "(ê for example) - perhaps "
536 "the digit is too large"),
541 /* characters XML permits */
545 (l >= 0x20 && l <= 0xD7FF) ||
546 (l >= 0xE000 && l <= 0xFFFD) ||
547 (l >= 0x10000 && l <= 0x10FFFF))
550 g_string_append (str, char_str (l, buf));
554 set_unescape_error (context, error,
556 G_MARKUP_ERROR_PARSE,
557 _("Character reference '%s' does not encode a permitted character"),
564 /* Move to next state */
565 p = g_utf8_next_char (p); /* past semicolon */
567 state = USTATE_INSIDE_TEXT;
571 set_unescape_error (context, error,
573 G_MARKUP_ERROR_PARSE,
574 _("Empty character reference; "
575 "should include a digit such as "
581 set_unescape_error (context, error,
583 G_MARKUP_ERROR_PARSE,
584 _("Character reference did not end with a "
586 "most likely you used an ampersand "
587 "character without intending to start "
588 "an entity - escape ampersand as &"));
594 g_assert_not_reached ();
599 if (context->state != STATE_ERROR)
603 case USTATE_INSIDE_TEXT:
605 case USTATE_AFTER_AMPERSAND:
606 case USTATE_INSIDE_ENTITY_NAME:
607 set_unescape_error (context, error,
609 G_MARKUP_ERROR_PARSE,
610 _("Unfinished entity reference"));
612 case USTATE_AFTER_CHARREF_HASH:
613 set_unescape_error (context, error,
615 G_MARKUP_ERROR_PARSE,
616 _("Unfinished character reference"));
621 if (context->state == STATE_ERROR)
623 g_string_free (str, TRUE);
629 *unescaped = g_string_free (str, FALSE);
637 advance_char (GMarkupParseContext *context)
640 context->iter = g_utf8_next_char (context->iter);
641 context->char_number += 1;
642 if (*context->iter == '\n')
644 context->line_number += 1;
645 context->char_number = 1;
648 return context->iter != context->current_text_end;
654 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
658 skip_spaces (GMarkupParseContext *context)
662 if (!xml_isspace (*context->iter))
665 while (advance_char (context));
669 advance_to_name_end (GMarkupParseContext *context)
673 if (!is_name_char (g_utf8_get_char (context->iter)))
676 while (advance_char (context));
680 add_to_partial (GMarkupParseContext *context,
681 const gchar *text_start,
682 const gchar *text_end)
684 if (context->partial_chunk == NULL)
685 context->partial_chunk = g_string_new (NULL);
687 if (text_start != text_end)
688 g_string_append_len (context->partial_chunk, text_start,
689 text_end - text_start);
691 /* Invariant here that partial_chunk exists */
695 truncate_partial (GMarkupParseContext *context)
697 if (context->partial_chunk != NULL)
699 context->partial_chunk = g_string_truncate (context->partial_chunk, 0);
704 current_element (GMarkupParseContext *context)
706 return context->tag_stack->data;
710 current_attribute (GMarkupParseContext *context)
712 g_assert (context->cur_attr >= 0);
713 return context->attr_names[context->cur_attr];
717 find_current_text_end (GMarkupParseContext *context)
719 /* This function must be safe (non-segfaulting) on invalid UTF8 */
720 const gchar *end = context->current_text + context->current_text_len;
724 g_assert (context->current_text_len > 0);
726 p = context->current_text;
727 next = g_utf8_find_next_char (p, end);
729 while (next && *next)
734 next = g_utf8_find_next_char (p, end);
737 /* p is now the start of the last character or character portion. */
739 next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
743 /* whole character */
744 context->current_text_end = end;
749 context->leftover_char_portion = g_string_new_len (p, end - p);
750 context->current_text_len -= (end - p);
751 context->current_text_end = p;
756 add_attribute (GMarkupParseContext *context, char *name)
758 if (context->cur_attr + 2 >= context->alloc_attrs)
760 context->alloc_attrs += 5; /* silly magic number */
761 context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs);
762 context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs);
765 context->attr_names[context->cur_attr] = name;
766 context->attr_values[context->cur_attr] = NULL;
767 context->attr_names[context->cur_attr+1] = NULL;
768 context->attr_values[context->cur_attr+1] = NULL;
772 * g_markup_parse_context_parse:
773 * @context: a #GMarkupParseContext
774 * @text: chunk of text to parse
775 * @text_len: length of @text in bytes
776 * @error: return location for a #GError
778 * Feed some data to the #GMarkupParseContext. The data need not
779 * be valid UTF-8; an error will be signaled if it's invalid.
780 * The data need not be an entire document; you can feed a document
781 * into the parser incrementally, via multiple calls to this function.
782 * Typically, as you receive data from a network connection or file,
783 * you feed each received chunk of data into this function, aborting
784 * the process if an error occurs. Once an error is reported, no further
785 * data may be fed to the #GMarkupParseContext; all errors are fatal.
787 * Return value: %FALSE if an error occurred, %TRUE on success
790 g_markup_parse_context_parse (GMarkupParseContext *context,
795 const gchar *first_invalid;
797 g_return_val_if_fail (context != NULL, FALSE);
798 g_return_val_if_fail (text != NULL, FALSE);
799 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
800 g_return_val_if_fail (!context->parsing, FALSE);
803 text_len = strlen (text);
808 context->parsing = TRUE;
810 if (context->leftover_char_portion)
812 const gchar *first_char;
814 if ((*text & 0xc0) != 0x80)
817 first_char = g_utf8_find_next_char (text, text + text_len);
821 /* leftover_char_portion was completed. Parse it. */
822 GString *portion = context->leftover_char_portion;
824 g_string_append_len (context->leftover_char_portion,
825 text, first_char - text);
827 /* hacks to allow recursion */
828 context->parsing = FALSE;
829 context->leftover_char_portion = NULL;
831 if (!g_markup_parse_context_parse (context,
832 portion->str, portion->len,
835 g_assert (context->state == STATE_ERROR);
838 g_string_free (portion, TRUE);
839 context->parsing = TRUE;
841 /* Skip the fraction of char that was in this text */
842 text_len -= (first_char - text);
847 /* another little chunk of the leftover char; geez
848 * someone is inefficient.
850 g_string_append_len (context->leftover_char_portion,
853 if (context->leftover_char_portion->len > 7)
855 /* The leftover char portion is too big to be
860 G_MARKUP_ERROR_BAD_UTF8,
861 _("Invalid UTF-8 encoded text"));
868 context->current_text = text;
869 context->current_text_len = text_len;
870 context->iter = context->current_text;
871 context->start = context->iter;
873 /* Nothing left after finishing the leftover char, or nothing
874 * passed in to begin with.
876 if (context->current_text_len == 0)
879 /* find_current_text_end () assumes the string starts at
880 * a character start, so we need to validate at least
881 * that much. It doesn't assume any following bytes
884 if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
888 G_MARKUP_ERROR_BAD_UTF8,
889 _("Invalid UTF-8 encoded text"));
893 /* Initialize context->current_text_end, possibly adjusting
894 * current_text_len, and add any leftover char portion
896 find_current_text_end (context);
898 /* Validate UTF8 (must be done after we find the end, since
899 * we could have a trailing incomplete char)
901 if (!g_utf8_validate (context->current_text,
902 context->current_text_len,
907 p = context->current_text;
908 while (p != context->current_text_end)
915 context->line_number += newlines;
919 G_MARKUP_ERROR_BAD_UTF8,
920 _("Invalid UTF-8 encoded text"));
924 while (context->iter != context->current_text_end)
926 switch (context->state)
929 /* Possible next state: AFTER_OPEN_ANGLE */
931 g_assert (context->tag_stack == NULL);
933 /* whitespace is ignored outside of any elements */
934 skip_spaces (context);
936 if (context->iter != context->current_text_end)
938 if (*context->iter == '<')
940 /* Move after the open angle */
941 advance_char (context);
943 context->state = STATE_AFTER_OPEN_ANGLE;
945 /* this could start a passthrough */
946 context->start = context->iter;
948 /* document is now non-empty */
949 context->document_empty = FALSE;
955 G_MARKUP_ERROR_PARSE,
956 _("Document must begin with an element (e.g. <book>)"));
961 case STATE_AFTER_OPEN_ANGLE:
962 /* Possible next states: INSIDE_OPEN_TAG_NAME,
963 * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
965 if (*context->iter == '?' ||
966 *context->iter == '!')
968 /* include < in the passthrough */
969 const gchar *openangle = "<";
970 add_to_partial (context, openangle, openangle + 1);
971 context->start = context->iter;
972 context->balance = 1;
973 context->state = STATE_INSIDE_PASSTHROUGH;
975 else if (*context->iter == '/')
978 advance_char (context);
980 context->state = STATE_AFTER_CLOSE_TAG_SLASH;
982 else if (is_name_start_char (g_utf8_get_char (context->iter)))
984 context->state = STATE_INSIDE_OPEN_TAG_NAME;
986 /* start of tag name */
987 context->start = context->iter;
994 G_MARKUP_ERROR_PARSE,
995 _("'%s' is not a valid character following "
996 "a '<' character; it may not begin an "
998 utf8_str (context->iter, buf));
1002 /* The AFTER_CLOSE_ANGLE state is actually sort of
1003 * broken, because it doesn't correspond to a range
1004 * of characters in the input stream as the others do,
1005 * and thus makes things harder to conceptualize
1007 case STATE_AFTER_CLOSE_ANGLE:
1008 /* Possible next states: INSIDE_TEXT, STATE_START */
1009 if (context->tag_stack == NULL)
1011 context->start = NULL;
1012 context->state = STATE_START;
1016 context->start = context->iter;
1017 context->state = STATE_INSIDE_TEXT;
1021 case STATE_AFTER_ELISION_SLASH:
1022 /* Possible next state: AFTER_CLOSE_ANGLE */
1025 /* We need to pop the tag stack and call the end_element
1026 * function, since this is the close tag
1028 GError *tmp_error = NULL;
1030 g_assert (context->tag_stack != NULL);
1033 if (context->parser->end_element)
1034 (* context->parser->end_element) (context,
1035 context->tag_stack->data,
1041 mark_error (context, tmp_error);
1042 g_propagate_error (error, tmp_error);
1046 if (*context->iter == '>')
1048 /* move after the close angle */
1049 advance_char (context);
1050 context->state = STATE_AFTER_CLOSE_ANGLE;
1057 G_MARKUP_ERROR_PARSE,
1058 _("Odd character '%s', expected a '>' character "
1059 "to end the start tag of element '%s'"),
1060 utf8_str (context->iter, buf),
1061 current_element (context));
1065 g_free (context->tag_stack->data);
1066 context->tag_stack = g_slist_delete_link (context->tag_stack,
1067 context->tag_stack);
1071 case STATE_INSIDE_OPEN_TAG_NAME:
1072 /* Possible next states: BETWEEN_ATTRIBUTES */
1074 /* if there's a partial chunk then it's the first part of the
1075 * tag name. If there's a context->start then it's the start
1076 * of the tag name in current_text, the partial chunk goes
1077 * before that start though.
1079 advance_to_name_end (context);
1081 if (context->iter == context->current_text_end)
1083 /* The name hasn't necessarily ended. Merge with
1084 * partial chunk, leave state unchanged.
1086 add_to_partial (context, context->start, context->iter);
1090 /* The name has ended. Combine it with the partial chunk
1091 * if any; push it on the stack; enter next state.
1093 add_to_partial (context, context->start, context->iter);
1094 context->tag_stack =
1095 g_slist_prepend (context->tag_stack,
1096 g_string_free (context->partial_chunk,
1099 context->partial_chunk = NULL;
1101 context->state = STATE_BETWEEN_ATTRIBUTES;
1102 context->start = NULL;
1106 case STATE_INSIDE_ATTRIBUTE_NAME:
1107 /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1109 /* read the full name, if we enter the equals sign state
1110 * then add the attribute to the list (without the value),
1111 * otherwise store a partial chunk to be prepended later.
1113 advance_to_name_end (context);
1115 if (context->iter == context->current_text_end)
1117 /* The name hasn't necessarily ended. Merge with
1118 * partial chunk, leave state unchanged.
1120 add_to_partial (context, context->start, context->iter);
1124 /* The name has ended. Combine it with the partial chunk
1125 * if any; push it on the stack; enter next state.
1127 add_to_partial (context, context->start, context->iter);
1129 add_attribute (context, g_string_free (context->partial_chunk, FALSE));
1131 context->partial_chunk = NULL;
1132 context->start = NULL;
1134 if (*context->iter == '=')
1136 advance_char (context);
1137 context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1144 G_MARKUP_ERROR_PARSE,
1145 _("Odd character '%s', expected a '=' after "
1146 "attribute name '%s' of element '%s'"),
1147 utf8_str (context->iter, buf),
1148 current_attribute (context),
1149 current_element (context));
1155 case STATE_BETWEEN_ATTRIBUTES:
1156 /* Possible next states: AFTER_CLOSE_ANGLE,
1157 * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1159 skip_spaces (context);
1161 if (context->iter != context->current_text_end)
1163 if (*context->iter == '/')
1165 advance_char (context);
1166 context->state = STATE_AFTER_ELISION_SLASH;
1168 else if (*context->iter == '>')
1171 advance_char (context);
1172 context->state = STATE_AFTER_CLOSE_ANGLE;
1174 else if (is_name_start_char (g_utf8_get_char (context->iter)))
1176 context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1177 /* start of attribute name */
1178 context->start = context->iter;
1185 G_MARKUP_ERROR_PARSE,
1186 _("Odd character '%s', expected a '>' or '/' "
1187 "character to end the start tag of "
1188 "element '%s', or optionally an attribute; "
1189 "perhaps you used an invalid character in "
1190 "an attribute name"),
1191 utf8_str (context->iter, buf),
1192 current_element (context));
1195 /* If we're done with attributes, invoke
1196 * the start_element callback
1198 if (context->state == STATE_AFTER_ELISION_SLASH ||
1199 context->state == STATE_AFTER_CLOSE_ANGLE)
1201 const gchar *start_name;
1202 /* Ugly, but the current code expects an empty array instead of NULL */
1203 const gchar *empty = NULL;
1204 const gchar **attr_names = ∅
1205 const gchar **attr_values = ∅
1208 /* Call user callback for element start */
1209 start_name = current_element (context);
1211 if (context->cur_attr >= 0)
1213 attr_names = (const gchar**)context->attr_names;
1214 attr_values = (const gchar**)context->attr_values;
1218 if (context->parser->start_element)
1219 (* context->parser->start_element) (context,
1221 (const gchar **)attr_names,
1222 (const gchar **)attr_values,
1226 /* Go ahead and free the attributes. */
1227 for (; context->cur_attr >= 0; context->cur_attr--)
1229 int pos = context->cur_attr;
1230 g_free (context->attr_names[pos]);
1231 g_free (context->attr_values[pos]);
1232 context->attr_names[pos] = context->attr_values[pos] = NULL;
1234 g_assert (context->cur_attr == -1);
1235 g_assert (context->attr_names == NULL ||
1236 context->attr_names[0] == NULL);
1237 g_assert (context->attr_values == NULL ||
1238 context->attr_values[0] == NULL);
1240 if (tmp_error != NULL)
1242 mark_error (context, tmp_error);
1243 g_propagate_error (error, tmp_error);
1249 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1250 /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1251 if (*context->iter == '"')
1253 advance_char (context);
1254 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1255 context->start = context->iter;
1257 else if (*context->iter == '\'')
1259 advance_char (context);
1260 context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1261 context->start = context->iter;
1268 G_MARKUP_ERROR_PARSE,
1269 _("Odd character '%s', expected an open quote mark "
1270 "after the equals sign when giving value for "
1271 "attribute '%s' of element '%s'"),
1272 utf8_str (context->iter, buf),
1273 current_attribute (context),
1274 current_element (context));
1278 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1279 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1280 /* Possible next states: BETWEEN_ATTRIBUTES */
1284 if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1295 if (*context->iter == delim)
1298 while (advance_char (context));
1300 if (context->iter == context->current_text_end)
1302 /* The value hasn't necessarily ended. Merge with
1303 * partial chunk, leave state unchanged.
1305 add_to_partial (context, context->start, context->iter);
1309 /* The value has ended at the quote mark. Combine it
1310 * with the partial chunk if any; set it for the current
1313 add_to_partial (context, context->start, context->iter);
1315 g_assert (context->cur_attr >= 0);
1317 if (unescape_text (context,
1318 context->partial_chunk->str,
1319 context->partial_chunk->str +
1320 context->partial_chunk->len,
1321 &context->attr_values[context->cur_attr],
1324 /* success, advance past quote and set state. */
1325 advance_char (context);
1326 context->state = STATE_BETWEEN_ATTRIBUTES;
1327 context->start = NULL;
1330 truncate_partial (context);
1334 case STATE_INSIDE_TEXT:
1335 /* Possible next states: AFTER_OPEN_ANGLE */
1338 if (*context->iter == '<')
1341 while (advance_char (context));
1343 /* The text hasn't necessarily ended. Merge with
1344 * partial chunk, leave state unchanged.
1347 add_to_partial (context, context->start, context->iter);
1349 if (context->iter != context->current_text_end)
1351 gchar *unescaped = NULL;
1353 /* The text has ended at the open angle. Call the text
1357 if (unescape_text (context,
1358 context->partial_chunk->str,
1359 context->partial_chunk->str +
1360 context->partial_chunk->len,
1364 GError *tmp_error = NULL;
1366 if (context->parser->text)
1367 (*context->parser->text) (context,
1375 if (tmp_error == NULL)
1377 /* advance past open angle and set state. */
1378 advance_char (context);
1379 context->state = STATE_AFTER_OPEN_ANGLE;
1380 /* could begin a passthrough */
1381 context->start = context->iter;
1385 mark_error (context, tmp_error);
1386 g_propagate_error (error, tmp_error);
1390 truncate_partial (context);
1394 case STATE_AFTER_CLOSE_TAG_SLASH:
1395 /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1396 if (is_name_start_char (g_utf8_get_char (context->iter)))
1398 context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1400 /* start of tag name */
1401 context->start = context->iter;
1408 G_MARKUP_ERROR_PARSE,
1409 _("'%s' is not a valid character following "
1410 "the characters '</'; '%s' may not begin an "
1412 utf8_str (context->iter, buf),
1413 utf8_str (context->iter, buf));
1417 case STATE_INSIDE_CLOSE_TAG_NAME:
1418 /* Possible next state: AFTER_CLOSE_ANGLE */
1419 advance_to_name_end (context);
1421 if (context->iter == context->current_text_end)
1423 /* The name hasn't necessarily ended. Merge with
1424 * partial chunk, leave state unchanged.
1426 add_to_partial (context, context->start, context->iter);
1430 /* The name has ended. Combine it with the partial chunk
1431 * if any; check that it matches stack top and pop
1432 * stack; invoke proper callback; enter next state.
1436 add_to_partial (context, context->start, context->iter);
1438 close_name = g_string_free (context->partial_chunk, FALSE);
1439 context->partial_chunk = NULL;
1441 if (*context->iter != '>')
1446 G_MARKUP_ERROR_PARSE,
1447 _("'%s' is not a valid character following "
1448 "the close element name '%s'; the allowed "
1449 "character is '>'"),
1450 utf8_str (context->iter, buf),
1453 else if (context->tag_stack == NULL)
1457 G_MARKUP_ERROR_PARSE,
1458 _("Element '%s' was closed, no element "
1459 "is currently open"),
1462 else if (strcmp (close_name, current_element (context)) != 0)
1466 G_MARKUP_ERROR_PARSE,
1467 _("Element '%s' was closed, but the currently "
1468 "open element is '%s'"),
1470 current_element (context));
1475 advance_char (context);
1476 context->state = STATE_AFTER_CLOSE_ANGLE;
1477 context->start = NULL;
1479 /* call the end_element callback */
1481 if (context->parser->end_element)
1482 (* context->parser->end_element) (context,
1488 /* Pop the tag stack */
1489 g_free (context->tag_stack->data);
1490 context->tag_stack = g_slist_delete_link (context->tag_stack,
1491 context->tag_stack);
1495 mark_error (context, tmp_error);
1496 g_propagate_error (error, tmp_error);
1500 g_free (close_name);
1504 case STATE_INSIDE_PASSTHROUGH:
1505 /* Possible next state: AFTER_CLOSE_ANGLE */
1508 if (*context->iter == '<')
1510 if (*context->iter == '>')
1513 add_to_partial (context, context->start, context->iter);
1514 context->start = context->iter;
1515 if ((g_str_has_prefix (context->partial_chunk->str, "<?")
1516 && g_str_has_suffix (context->partial_chunk->str, "?")) ||
1517 (g_str_has_prefix (context->partial_chunk->str, "<!--")
1518 && g_str_has_suffix (context->partial_chunk->str, "--")) ||
1519 (g_str_has_prefix (context->partial_chunk->str, "<![CDATA[")
1520 && g_str_has_suffix (context->partial_chunk->str, "]]")) ||
1521 (g_str_has_prefix (context->partial_chunk->str, "<!DOCTYPE")
1522 && context->balance == 0))
1526 while (advance_char (context));
1528 if (context->iter == context->current_text_end)
1530 /* The passthrough hasn't necessarily ended. Merge with
1531 * partial chunk, leave state unchanged.
1533 add_to_partial (context, context->start, context->iter);
1537 /* The passthrough has ended at the close angle. Combine
1538 * it with the partial chunk if any. Call the passthrough
1539 * callback. Note that the open/close angles are
1540 * included in the text of the passthrough.
1542 GError *tmp_error = NULL;
1544 advance_char (context); /* advance past close angle */
1545 add_to_partial (context, context->start, context->iter);
1547 if (context->parser->passthrough)
1548 (*context->parser->passthrough) (context,
1549 context->partial_chunk->str,
1550 context->partial_chunk->len,
1554 truncate_partial (context);
1556 if (tmp_error == NULL)
1558 context->state = STATE_AFTER_CLOSE_ANGLE;
1559 context->start = context->iter; /* could begin text */
1563 mark_error (context, tmp_error);
1564 g_propagate_error (error, tmp_error);
1574 g_assert_not_reached ();
1580 context->parsing = FALSE;
1582 return context->state != STATE_ERROR;
1586 * g_markup_parse_context_end_parse:
1587 * @context: a #GMarkupParseContext
1588 * @error: return location for a #GError
1590 * Signals to the #GMarkupParseContext that all data has been
1591 * fed into the parse context with g_markup_parse_context_parse().
1592 * This function reports an error if the document isn't complete,
1593 * for example if elements are still open.
1595 * Return value: %TRUE on success, %FALSE if an error was set
1598 g_markup_parse_context_end_parse (GMarkupParseContext *context,
1601 g_return_val_if_fail (context != NULL, FALSE);
1602 g_return_val_if_fail (!context->parsing, FALSE);
1603 g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1605 if (context->partial_chunk != NULL)
1607 g_string_free (context->partial_chunk, TRUE);
1608 context->partial_chunk = NULL;
1611 if (context->document_empty)
1613 set_error (context, error, G_MARKUP_ERROR_EMPTY,
1614 _("Document was empty or contained only whitespace"));
1618 context->parsing = TRUE;
1620 switch (context->state)
1626 case STATE_AFTER_OPEN_ANGLE:
1627 set_error (context, error, G_MARKUP_ERROR_PARSE,
1628 _("Document ended unexpectedly just after an open angle bracket '<'"));
1631 case STATE_AFTER_CLOSE_ANGLE:
1632 if (context->tag_stack != NULL)
1634 /* Error message the same as for INSIDE_TEXT */
1635 set_error (context, error, G_MARKUP_ERROR_PARSE,
1636 _("Document ended unexpectedly with elements still open - "
1637 "'%s' was the last element opened"),
1638 current_element (context));
1642 case STATE_AFTER_ELISION_SLASH:
1643 set_error (context, error, G_MARKUP_ERROR_PARSE,
1644 _("Document ended unexpectedly, expected to see a close angle "
1645 "bracket ending the tag <%s/>"), current_element (context));
1648 case STATE_INSIDE_OPEN_TAG_NAME:
1649 set_error (context, error, G_MARKUP_ERROR_PARSE,
1650 _("Document ended unexpectedly inside an element name"));
1653 case STATE_INSIDE_ATTRIBUTE_NAME:
1654 set_error (context, error, G_MARKUP_ERROR_PARSE,
1655 _("Document ended unexpectedly inside an attribute name"));
1658 case STATE_BETWEEN_ATTRIBUTES:
1659 set_error (context, error, G_MARKUP_ERROR_PARSE,
1660 _("Document ended unexpectedly inside an element-opening "
1664 case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1665 set_error (context, error, G_MARKUP_ERROR_PARSE,
1666 _("Document ended unexpectedly after the equals sign "
1667 "following an attribute name; no attribute value"));
1670 case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1671 case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1672 set_error (context, error, G_MARKUP_ERROR_PARSE,
1673 _("Document ended unexpectedly while inside an attribute "
1677 case STATE_INSIDE_TEXT:
1678 g_assert (context->tag_stack != NULL);
1679 set_error (context, error, G_MARKUP_ERROR_PARSE,
1680 _("Document ended unexpectedly with elements still open - "
1681 "'%s' was the last element opened"),
1682 current_element (context));
1685 case STATE_AFTER_CLOSE_TAG_SLASH:
1686 case STATE_INSIDE_CLOSE_TAG_NAME:
1687 set_error (context, error, G_MARKUP_ERROR_PARSE,
1688 _("Document ended unexpectedly inside the close tag for "
1689 "element '%s'"), current_element);
1692 case STATE_INSIDE_PASSTHROUGH:
1693 set_error (context, error, G_MARKUP_ERROR_PARSE,
1694 _("Document ended unexpectedly inside a comment or "
1695 "processing instruction"));
1700 g_assert_not_reached ();
1704 context->parsing = FALSE;
1706 return context->state != STATE_ERROR;
1710 * g_markup_parse_context_get_element:
1711 * @context: a #GMarkupParseContext
1712 * @returns: the name of the currently open element, or %NULL
1714 * Retrieves the name of the currently open element.
1718 G_CONST_RETURN gchar *
1719 g_markup_parse_context_get_element (GMarkupParseContext *context)
1721 g_return_val_if_fail (context != NULL, NULL);
1723 if (context->tag_stack == NULL)
1726 return current_element (context);
1730 * g_markup_parse_context_get_position:
1731 * @context: a #GMarkupParseContext
1732 * @line_number: return location for a line number, or %NULL
1733 * @char_number: return location for a char-on-line number, or %NULL
1735 * Retrieves the current line number and the number of the character on
1736 * that line. Intended for use in error messages; there are no strict
1737 * semantics for what constitutes the "current" line number other than
1738 * "the best number we could come up with for error messages."
1742 g_markup_parse_context_get_position (GMarkupParseContext *context,
1746 g_return_if_fail (context != NULL);
1749 *line_number = context->line_number;
1752 *char_number = context->char_number;
1756 append_escaped_text (GString *str,
1764 end = text + length;
1769 next = g_utf8_next_char (p);
1774 g_string_append (str, "&");
1778 g_string_append (str, "<");
1782 g_string_append (str, ">");
1786 g_string_append (str, "'");
1790 g_string_append (str, """);
1794 g_string_append_len (str, p, next - p);
1803 * g_markup_escape_text:
1804 * @text: some valid UTF-8 text
1805 * @length: length of @text in bytes
1807 * Escapes text so that the markup parser will parse it verbatim.
1808 * Less than, greater than, ampersand, etc. are replaced with the
1809 * corresponding entities. This function would typically be used
1810 * when writing out a file to be parsed with the markup parser.
1812 * Return value: escaped text
1815 g_markup_escape_text (const gchar *text,
1820 g_return_val_if_fail (text != NULL, NULL);
1823 length = strlen (text);
1825 str = g_string_new (NULL);
1826 append_escaped_text (str, text, length);
1828 return g_string_free (str, FALSE);
1833 * @format: a printf-style format string
1834 * @after: location to store a pointer to the character after
1835 * the returned conversion. On a %NULL return, returns the
1836 * pointer to the trailing NUL in the string
1838 * Find the next conversion in a printf-style format string.
1839 * Partially based on code from printf-parser.c,
1840 * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
1842 * Return value: pointer to the next conversion in @format,
1843 * or %NULL, if none.
1846 find_conversion (const char *format,
1849 const char *start = format;
1852 while (*start != '\0' && *start != '%')
1869 /* Test for positional argument. */
1870 if (*cp >= '0' && *cp <= '9')
1874 for (np = cp; *np >= '0' && *np <= '9'; np++)
1880 /* Skip the flags. */
1894 /* Skip the field width. */
1899 /* Test for positional argument. */
1900 if (*cp >= '0' && *cp <= '9')
1904 for (np = cp; *np >= '0' && *np <= '9'; np++)
1912 for (; *cp >= '0' && *cp <= '9'; cp++)
1916 /* Skip the precision. */
1922 /* Test for positional argument. */
1923 if (*cp >= '0' && *cp <= '9')
1927 for (np = cp; *np >= '0' && *np <= '9'; np++)
1935 for (; *cp >= '0' && *cp <= '9'; cp++)
1940 /* Skip argument type/size specifiers. */
1941 while (*cp == 'h' ||
1950 /* Skip the conversion character. */
1958 * g_markup_vprintf_escaped:
1959 * @format: printf() style format string
1960 * @args: variable argument list, similar to vprintf()
1962 * Formats the data in @args according to @format, escaping
1963 * all string and character arguments in the fashion
1964 * of g_markup_escape_text(). See g_markup_printf_escaped().
1966 * Return value: newly allocated result from formatting
1967 * operation. Free with g_free().
1970 g_markup_vprintf_escaped (const char *format,
1975 GString *result = NULL;
1976 gchar *output1 = NULL;
1977 gchar *output2 = NULL;
1978 const char *p, *op1, *op2;
1981 /* The technique here, is that we make two format strings that
1982 * have the identical conversions in the identical order to the
1983 * original strings, but differ in the text in-between. We
1984 * then use the normal g_strdup_vprintf() to format the arguments
1985 * with the two new format strings. By comparing the results,
1986 * we can figure out what segments of the output come from
1987 * the the original format string, and what from the arguments,
1988 * and thus know what portions of the string to escape.
1990 * For instance, for:
1992 * g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
1994 * We form the two format strings "%sX%dX" and %sY%sY". The results
1995 * of formatting with those two strings are
1997 * "%sX%dX" => "Susan & FredX5X"
1998 * "%sY%dY" => "Susan & FredY5Y"
2000 * To find the span of the first argument, we find the first position
2001 * where the two arguments differ, which tells us that the first
2002 * argument formatted to "Susan & Fred". We then escape that
2003 * to "Susan & Fred" and join up with the intermediate portions
2004 * of the format string and the second argument to get
2005 * "Susan & Fred ate 5 apples".
2008 /* Create the two modified format strings
2010 format1 = g_string_new (NULL);
2011 format2 = g_string_new (NULL);
2016 const char *conv = find_conversion (p, &after);
2020 g_string_append_len (format1, conv, after - conv);
2021 g_string_append_c (format1, 'X');
2022 g_string_append_len (format2, conv, after - conv);
2023 g_string_append_c (format2, 'Y');
2028 /* Use them to format the arguments
2030 G_VA_COPY (args2, args);
2032 output1 = g_strdup_vprintf (format1->str, args);
2037 output2 = g_strdup_vprintf (format2->str, args2);
2042 result = g_string_new (NULL);
2044 /* Iterate through the original format string again,
2045 * copying the non-conversion portions and the escaped
2046 * converted arguments to the output string.
2054 const char *output_start;
2055 const char *conv = find_conversion (p, &after);
2058 if (!conv) /* The end, after points to the trailing \0 */
2060 g_string_append_len (result, p, after - p);
2064 g_string_append_len (result, p, conv - p);
2066 while (*op1 == *op2)
2072 escaped = g_markup_escape_text (output_start, op1 - output_start);
2073 g_string_append (result, escaped);
2082 g_string_free (format1, TRUE);
2083 g_string_free (format2, TRUE);
2088 return g_string_free (result, FALSE);
2094 * g_markup_printf_escaped:
2095 * @format: printf() style format string
2096 * @Varargs: the arguments to insert in the format string
2098 * Formats arguments according to @format, escaping
2099 * all string and character arguments in the fashion
2100 * of g_markup_escape_text(). This is useful when you
2101 * want to insert literal strings into XML-style markup
2102 * output, without having to worry that the strings
2103 * might themselves contain markup.
2105 * <informalexample><programlisting>
2106 * const char *store = "Fortnum & Mason";
2107 * const char *item = "Tea";
2110 * output = g_markup_printf_escaped ("<purchase>"
2111 * "<store>%s</store>"
2112 * "<item>%s</item>"
2113 * "</purchase>",
2115 * </programlisting></informalexample>
2117 * Return value: newly allocated result from formatting
2118 * operation. Free with g_free().
2121 g_markup_printf_escaped (const char *format, ...)
2126 va_start (args, format);
2127 result = g_markup_vprintf_escaped (format, args);