glib/gmarkup.c

   1 /* gmarkup.c - Simple XML-like parser
   2  *
   3  *  Copyright 2000, 2003 Red Hat, Inc.
   4  *  Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
   5  *
   6  * GLib is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU Lesser General Public License as
   8  * published by the Free Software Foundation; either version 2 of the
   9  * License, or (at your option) any later version.
  10  *
  11  * GLib is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with GLib; see the file COPYING.LIB.  If not,
  18  * see <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include "config.h"
  22
  23 #include <stdarg.h>
  24 #include <string.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <errno.h>
  28
  29 #include "gmarkup.h"
  30
  31 #include "gatomic.h"
  32 #include "gslice.h"
  33 #include "galloca.h"
  34 #include "gstrfuncs.h"
  35 #include "gstring.h"
  36 #include "gtestutils.h"
  37 #include "glibintl.h"
  38 #include "gthread.h"
  39
  40 /**
  41  * SECTION:markup
  42  * @Title: Simple XML Subset Parser
  43  * @Short_description: parses a subset of XML
  44  * @See_also: [XML Specification](http://www.w3.org/TR/REC-xml/)
  45  *
  46  * The "GMarkup" parser is intended to parse a simple markup format
  47  * that's a subset of XML. This is a small, efficient, easy-to-use
  48  * parser. It should not be used if you expect to interoperate with
  49  * other applications generating full-scale XML. However, it's very
  50  * useful for application data files, config files, etc. where you
  51  * know your application will be the only one writing the file.
  52  * Full-scale XML parsers should be able to parse the subset used by
  53  * GMarkup, so you can easily migrate to full-scale XML at a later
  54  * time if the need arises.
  55  *
  56  * GMarkup is not guaranteed to signal an error on all invalid XML;
  57  * the parser may accept documents that an XML parser would not.
  58  * However, XML documents which are not well-formed (which is a
  59  * weaker condition than being valid. See the
  60  * [XML specification](http://www.w3.org/TR/REC-xml/)
  61  * for definitions of these terms.) are not considered valid GMarkup
  62  * documents.
  63  *
  64  * Simplifications to XML include:
  65  *
  66  * - Only UTF-8 encoding is allowed
  67  *
  68  * - No user-defined entities
  69  *
  70  * - Processing instructions, comments and the doctype declaration
  71  *   are "passed through" but are not interpreted in any way
  72  *
  73  * - No DTD or validation
  74  *
  75  * The markup format does support:
  76  *
  77  * - Elements
  78  *
  79  * - Attributes
  80  *
  81  * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
  82  *
  83  * - Character references
  84  *
  85  * - Sections marked as CDATA
  86  */
  87
  88 G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
  89
  90 typedef enum
  91 {
  92   STATE_START,
  93   STATE_AFTER_OPEN_ANGLE,
  94   STATE_AFTER_CLOSE_ANGLE,
  95   STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
  96   STATE_INSIDE_OPEN_TAG_NAME,
  97   STATE_INSIDE_ATTRIBUTE_NAME,
  98   STATE_AFTER_ATTRIBUTE_NAME,
  99   STATE_BETWEEN_ATTRIBUTES,
 100   STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
 101   STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
 102   STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
 103   STATE_INSIDE_TEXT,
 104   STATE_AFTER_CLOSE_TAG_SLASH,
 105   STATE_INSIDE_CLOSE_TAG_NAME,
 106   STATE_AFTER_CLOSE_TAG_NAME,
 107   STATE_INSIDE_PASSTHROUGH,
 108   STATE_ERROR
 109 } GMarkupParseState;
 110
 111 typedef struct
 112 {
 113   const char *prev_element;
 114   const GMarkupParser *prev_parser;
 115   gpointer prev_user_data;
 116 } GMarkupRecursionTracker;
 117
 118 struct _GMarkupParseContext
 119 {
 120   const GMarkupParser *parser;
 121
 122   volatile gint ref_count;
 123
 124   GMarkupParseFlags flags;
 125
 126   gint line_number;
 127   gint char_number;
 128
 129   GMarkupParseState state;
 130
 131   gpointer user_data;
 132   GDestroyNotify dnotify;
 133
 134   /* A piece of character data or an element that
 135    * hasn't "ended" yet so we haven't yet called
 136    * the callback for it.
 137    */
 138   GString *partial_chunk;
 139   GSList *spare_chunks;
 140
 141   GSList *tag_stack;
 142   GSList *tag_stack_gstr;
 143   GSList *spare_list_nodes;
 144
 145   GString **attr_names;
 146   GString **attr_values;
 147   gint cur_attr;
 148   gint alloc_attrs;
 149
 150   const gchar *current_text;
 151   gssize       current_text_len;
 152   const gchar *current_text_end;
 153
 154   /* used to save the start of the last interesting thingy */
 155   const gchar *start;
 156
 157   const gchar *iter;
 158
 159   guint document_empty : 1;
 160   guint parsing : 1;
 161   guint awaiting_pop : 1;
 162   gint balance;
 163
 164   /* subparser support */
 165   GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
 166   const char *subparser_element;
 167   gpointer held_user_data;
 168 };
 169
 170 /*
 171  * Helpers to reduce our allocation overhead, we have
 172  * a well defined allocation lifecycle.
 173  */
 174 static GSList *
 175 get_list_node (GMarkupParseContext *context, gpointer data)
 176 {
 177   GSList *node;
 178   if (context->spare_list_nodes != NULL)
 179     {
 180       node = context->spare_list_nodes;
 181       context->spare_list_nodes = g_slist_remove_link (context->spare_list_nodes, node);
 182     }
 183   else
 184     node = g_slist_alloc();
 185   node->data = data;
 186   return node;
 187 }
 188
 189 static void
 190 free_list_node (GMarkupParseContext *context, GSList *node)
 191 {
 192   node->data = NULL;
 193   context->spare_list_nodes = g_slist_concat (node, context->spare_list_nodes);
 194 }
 195
 196 static inline void
 197 string_blank (GString *string)
 198 {
 199   string->str[0] = '\0';
 200   string->len = 0;
 201 }
 202
 203 /**
 204  * g_markup_parse_context_new:
 205  * @parser: a #GMarkupParser
 206  * @flags: one or more #GMarkupParseFlags
 207  * @user_data: user data to pass to #GMarkupParser functions
 208  * @user_data_dnotify: user data destroy notifier called when
 209  *     the parse context is freed
 210  *
 211  * Creates a new parse context. A parse context is used to parse
 212  * marked-up documents. You can feed any number of documents into
 213  * a context, as long as no errors occur; once an error occurs,
 214  * the parse context can't continue to parse text (you have to
 215  * free it and create a new parse context).
 216  *
 217  * Returns: a new #GMarkupParseContext
 218  **/
 219 GMarkupParseContext *
 220 g_markup_parse_context_new (const GMarkupParser *parser,
 221                             GMarkupParseFlags    flags,
 222                             gpointer             user_data,
 223                             GDestroyNotify       user_data_dnotify)
 224 {
 225   GMarkupParseContext *context;
 226
 227   g_return_val_if_fail (parser != NULL, NULL);
 228
 229   context = g_new (GMarkupParseContext, 1);
 230
 231   context->ref_count = 1;
 232   context->parser = parser;
 233   context->flags = flags;
 234   context->user_data = user_data;
 235   context->dnotify = user_data_dnotify;
 236
 237   context->line_number = 1;
 238   context->char_number = 1;
 239
 240   context->partial_chunk = NULL;
 241   context->spare_chunks = NULL;
 242   context->spare_list_nodes = NULL;
 243
 244   context->state = STATE_START;
 245   context->tag_stack = NULL;
 246   context->tag_stack_gstr = NULL;
 247   context->attr_names = NULL;
 248   context->attr_values = NULL;
 249   context->cur_attr = -1;
 250   context->alloc_attrs = 0;
 251
 252   context->current_text = NULL;
 253   context->current_text_len = -1;
 254   context->current_text_end = NULL;
 255
 256   context->start = NULL;
 257   context->iter = NULL;
 258
 259   context->document_empty = TRUE;
 260   context->parsing = FALSE;
 261
 262   context->awaiting_pop = FALSE;
 263   context->subparser_stack = NULL;
 264   context->subparser_element = NULL;
 265
 266   /* this is only looked at if awaiting_pop = TRUE.  initialise anyway. */
 267   context->held_user_data = NULL;
 268
 269   context->balance = 0;
 270
 271   return context;
 272 }
 273
 274 /**
 275  * g_markup_parse_context_ref:
 276  * @context: a #GMarkupParseContext
 277  *
 278  * Increases the reference count of @context.
 279  *
 280  * Returns: the same @context
 281  *
 282  * Since: 2.36
 283  **/
 284 GMarkupParseContext *
 285 g_markup_parse_context_ref (GMarkupParseContext *context)
 286 {
 287   g_return_val_if_fail (context != NULL, NULL);
 288   g_return_val_if_fail (context->ref_count > 0, NULL);
 289
 290   g_atomic_int_inc (&context->ref_count);
 291
 292   return context;
 293 }
 294
 295 /**
 296  * g_markup_parse_context_unref:
 297  * @context: a #GMarkupParseContext
 298  *
 299  * Decreases the reference count of @context.  When its reference count
 300  * drops to 0, it is freed.
 301  *
 302  * Since: 2.36
 303  **/
 304 void
 305 g_markup_parse_context_unref (GMarkupParseContext *context)
 306 {
 307   g_return_if_fail (context != NULL);
 308   g_return_if_fail (context->ref_count > 0);
 309
 310   if (g_atomic_int_dec_and_test (&context->ref_count))
 311     g_markup_parse_context_free (context);
 312 }
 313
 314 static void
 315 string_full_free (gpointer ptr)
 316 {
 317   g_string_free (ptr, TRUE);
 318 }
 319
 320 static void clear_attributes (GMarkupParseContext *context);
 321
 322 /**
 323  * g_markup_parse_context_free:
 324  * @context: a #GMarkupParseContext
 325  *
 326  * Frees a #GMarkupParseContext.
 327  *
 328  * This function can't be called from inside one of the
 329  * #GMarkupParser functions or while a subparser is pushed.
 330  */
 331 void
 332 g_markup_parse_context_free (GMarkupParseContext *context)
 333 {
 334   g_return_if_fail (context != NULL);
 335   g_return_if_fail (!context->parsing);
 336   g_return_if_fail (!context->subparser_stack);
 337   g_return_if_fail (!context->awaiting_pop);
 338
 339   if (context->dnotify)
 340     (* context->dnotify) (context->user_data);
 341
 342   clear_attributes (context);
 343   g_free (context->attr_names);
 344   g_free (context->attr_values);
 345
 346   g_slist_free_full (context->tag_stack_gstr, string_full_free);
 347   g_slist_free (context->tag_stack);
 348
 349   g_slist_free_full (context->spare_chunks, string_full_free);
 350   g_slist_free (context->spare_list_nodes);
 351
 352   if (context->partial_chunk)
 353     g_string_free (context->partial_chunk, TRUE);
 354
 355   g_free (context);
 356 }
 357
 358 static void pop_subparser_stack (GMarkupParseContext *context);
 359
 360 static void
 361 mark_error (GMarkupParseContext *context,
 362             GError              *error)
 363 {
 364   context->state = STATE_ERROR;
 365
 366   if (context->parser->error)
 367     (*context->parser->error) (context, error, context->user_data);
 368
 369   /* report the error all the way up to free all the user-data */
 370   while (context->subparser_stack)
 371     {
 372       pop_subparser_stack (context);
 373       context->awaiting_pop = FALSE; /* already been freed */
 374
 375       if (context->parser->error)
 376         (*context->parser->error) (context, error, context->user_data);
 377     }
 378 }
 379
 380 static void
 381 set_error (GMarkupParseContext  *context,
 382            GError              **error,
 383            GMarkupError          code,
 384            const gchar          *format,
 385            ...) G_GNUC_PRINTF (4, 5);
 386
 387 static void
 388 set_error_literal (GMarkupParseContext  *context,
 389                    GError              **error,
 390                    GMarkupError          code,
 391                    const gchar          *message)
 392 {
 393   GError *tmp_error;
 394
 395   tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message);
 396
 397   g_prefix_error (&tmp_error,
 398                   _("Error on line %d char %d: "),
 399                   context->line_number,
 400                   context->char_number);
 401
 402   mark_error (context, tmp_error);
 403
 404   g_propagate_error (error, tmp_error);
 405 }
 406
 407 G_GNUC_PRINTF(4, 5)
 408 static void
 409 set_error (GMarkupParseContext  *context,
 410            GError              **error,
 411            GMarkupError          code,
 412            const gchar          *format,
 413            ...)
 414 {
 415   gchar *s;
 416   gchar *s_valid;
 417   va_list args;
 418
 419   va_start (args, format);
 420   s = g_strdup_vprintf (format, args);
 421   va_end (args);
 422
 423   /* Make sure that the GError message is valid UTF-8
 424    * even if it is complaining about invalid UTF-8 in the markup
 425    */
 426   s_valid = _g_utf8_make_valid (s);
 427   set_error_literal (context, error, code, s);
 428
 429   g_free (s);
 430   g_free (s_valid);
 431 }
 432
 433 static void
 434 propagate_error (GMarkupParseContext  *context,
 435                  GError              **dest,
 436                  GError               *src)
 437 {
 438   if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
 439     g_prefix_error (&src,
 440                     _("Error on line %d char %d: "),
 441                     context->line_number,
 442                     context->char_number);
 443
 444   mark_error (context, src);
 445
 446   g_propagate_error (dest, src);
 447 }
 448
 449 #define IS_COMMON_NAME_END_CHAR(c) \
 450   ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
 451
 452 static gboolean
 453 slow_name_validate (GMarkupParseContext  *context,
 454                     const gchar          *name,
 455                     GError              **error)
 456 {
 457   const gchar *p = name;
 458
 459   if (!g_utf8_validate (name, strlen (name), NULL))
 460     {
 461       set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
 462                  _("Invalid UTF-8 encoded text in name - not valid '%s'"), name);
 463       return FALSE;
 464     }
 465
 466   if (!(g_ascii_isalpha (*p) ||
 467         (!IS_COMMON_NAME_END_CHAR (*p) &&
 468          (*p == '_' ||
 469           *p == ':' ||
 470           g_unichar_isalpha (g_utf8_get_char (p))))))
 471     {
 472       set_error (context, error, G_MARKUP_ERROR_PARSE,
 473                  _("'%s' is not a valid name"), name);
 474       return FALSE;
 475     }
 476
 477   for (p = g_utf8_next_char (name); *p != '\0'; p = g_utf8_next_char (p))
 478     {
 479       /* is_name_char */
 480       if (!(g_ascii_isalnum (*p) ||
 481             (!IS_COMMON_NAME_END_CHAR (*p) &&
 482              (*p == '.' ||
 483               *p == '-' ||
 484               *p == '_' ||
 485               *p == ':' ||
 486               g_unichar_isalpha (g_utf8_get_char (p))))))
 487         {
 488           set_error (context, error, G_MARKUP_ERROR_PARSE,
 489                      _("'%s' is not a valid name: '%c'"), name, *p);
 490           return FALSE;
 491         }
 492     }
 493   return TRUE;
 494 }
 495
 496 /*
 497  * Use me for elements, attributes etc.
 498  */
 499 static gboolean
 500 name_validate (GMarkupParseContext  *context,
 501                const gchar          *name,
 502                GError              **error)
 503 {
 504   char mask;
 505   const char *p;
 506
 507   /* name start char */
 508   p = name;
 509   if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) ||
 510                   !(g_ascii_isalpha (*p) || *p == '_' || *p == ':')))
 511     goto slow_validate;
 512
 513   for (mask = *p++; *p != '\0'; p++)
 514     {
 515       mask |= *p;
 516
 517       /* is_name_char */
 518       if (G_UNLIKELY (!(g_ascii_isalnum (*p) ||
 519                         (!IS_COMMON_NAME_END_CHAR (*p) &&
 520                          (*p == '.' ||
 521                           *p == '-' ||
 522                           *p == '_' ||
 523                           *p == ':')))))
 524         goto slow_validate;
 525     }
 526
 527   if (mask & 0x80) /* un-common / non-ascii */
 528     goto slow_validate;
 529
 530   return TRUE;
 531
 532  slow_validate:
 533   return slow_name_validate (context, name, error);
 534 }
 535
 536 static gboolean
 537 text_validate (GMarkupParseContext  *context,
 538                const gchar          *p,
 539                gint                  len,
 540                GError              **error)
 541 {
 542   if (!g_utf8_validate (p, len, NULL))
 543     {
 544       set_error (context, error, G_MARKUP_ERROR_BAD_UTF8,
 545                  _("Invalid UTF-8 encoded text in name - not valid '%s'"), p);
 546       return FALSE;
 547     }
 548   else
 549     return TRUE;
 550 }
 551
 552 static gchar*
 553 char_str (gunichar c,
 554           gchar   *buf)
 555 {
 556   memset (buf, 0, 8);
 557   g_unichar_to_utf8 (c, buf);
 558   return buf;
 559 }
 560
 561 static gchar*
 562 utf8_str (const gchar *utf8,
 563           gchar       *buf)
 564 {
 565   char_str (g_utf8_get_char (utf8), buf);
 566   return buf;
 567 }
 568
 569 G_GNUC_PRINTF(5, 6)
 570 static void
 571 set_unescape_error (GMarkupParseContext  *context,
 572                     GError              **error,
 573                     const gchar          *remaining_text,
 574                     GMarkupError          code,
 575                     const gchar          *format,
 576                     ...)
 577 {
 578   GError *tmp_error;
 579   gchar *s;
 580   va_list args;
 581   gint remaining_newlines;
 582   const gchar *p;
 583
 584   remaining_newlines = 0;
 585   p = remaining_text;
 586   while (*p != '\0')
 587     {
 588       if (*p == '\n')
 589         ++remaining_newlines;
 590       ++p;
 591     }
 592
 593   va_start (args, format);
 594   s = g_strdup_vprintf (format, args);
 595   va_end (args);
 596
 597   tmp_error = g_error_new (G_MARKUP_ERROR,
 598                            code,
 599                            _("Error on line %d: %s"),
 600                            context->line_number - remaining_newlines,
 601                            s);
 602
 603   g_free (s);
 604
 605   mark_error (context, tmp_error);
 606
 607   g_propagate_error (error, tmp_error);
 608 }
 609
 610 /*
 611  * re-write the GString in-place, unescaping anything that escaped.
 612  * most XML does not contain entities, or escaping.
 613  */
 614 static gboolean
 615 unescape_gstring_inplace (GMarkupParseContext  *context,
 616                           GString              *string,
 617                           gboolean             *is_ascii,
 618                           GError              **error)
 619 {
 620   char mask, *to;
 621   int line_num = 1;
 622   const char *from;
 623   gboolean normalize_attribute;
 624
 625   *is_ascii = FALSE;
 626
 627   /* are we unescaping an attribute or not ? */
 628   if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
 629       context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
 630     normalize_attribute = TRUE;
 631   else
 632     normalize_attribute = FALSE;
 633
 634   /*
 635    * Meeks' theorem: unescaping can only shrink text.
 636    * for &lt; etc. this is obvious, for &#xffff; more
 637    * thought is required, but this is patently so.
 638    */
 639   mask = 0;
 640   for (from = to = string->str; *from != '\0'; from++, to++)
 641     {
 642       *to = *from;
 643
 644       mask |= *to;
 645       if (*to == '\n')
 646         line_num++;
 647       if (normalize_attribute && (*to == '\t' || *to == '\n'))
 648         *to = ' ';
 649       if (*to == '\r')
 650         {
 651           *to = normalize_attribute ? ' ' : '\n';
 652           if (from[1] == '\n')
 653             from++;
 654         }
 655       if (*from == '&')
 656         {
 657           from++;
 658           if (*from == '#')
 659             {
 660               gboolean is_hex = FALSE;
 661               gulong l;
 662               gchar *end = NULL;
 663
 664               from++;
 665
 666               if (*from == 'x')
 667                 {
 668                   is_hex = TRUE;
 669                   from++;
 670                 }
 671
 672               /* digit is between start and p */
 673               errno = 0;
 674               if (is_hex)
 675                 l = strtoul (from, &end, 16);
 676               else
 677                 l = strtoul (from, &end, 10);
 678
 679               if (end == from || errno != 0)
 680                 {
 681                   set_unescape_error (context, error,
 682                                       from, G_MARKUP_ERROR_PARSE,
 683                                       _("Failed to parse '%-.*s', which "
 684                                         "should have been a digit "
 685                                         "inside a character reference "
 686                                         "(&#234; for example) - perhaps "
 687                                         "the digit is too large"),
 688                                       (int)(end - from), from);
 689                   return FALSE;
 690                 }
 691               else if (*end != ';')
 692                 {
 693                   set_unescape_error (context, error,
 694                                       from, G_MARKUP_ERROR_PARSE,
 695                                       _("Character reference did not end with a "
 696                                         "semicolon; "
 697                                         "most likely you used an ampersand "
 698                                         "character without intending to start "
 699                                         "an entity - escape ampersand as &amp;"));
 700                   return FALSE;
 701                 }
 702               else
 703                 {
 704                   /* characters XML 1.1 permits */
 705                   if ((0 < l && l <= 0xD7FF) ||
 706                       (0xE000 <= l && l <= 0xFFFD) ||
 707                       (0x10000 <= l && l <= 0x10FFFF))
 708                     {
 709                       gchar buf[8];
 710                       char_str (l, buf);
 711                       strcpy (to, buf);
 712                       to += strlen (buf) - 1;
 713                       from = end;
 714                       if (l >= 0x80) /* not ascii */
 715                         mask |= 0x80;
 716                     }
 717                   else
 718                     {
 719                       set_unescape_error (context, error,
 720                                           from, G_MARKUP_ERROR_PARSE,
 721                                           _("Character reference '%-.*s' does not "
 722                                             "encode a permitted character"),
 723                                           (int)(end - from), from);
 724                       return FALSE;
 725                     }
 726                 }
 727             }
 728
 729           else if (strncmp (from, "lt;", 3) == 0)
 730             {
 731               *to = '<';
 732               from += 2;
 733             }
 734           else if (strncmp (from, "gt;", 3) == 0)
 735             {
 736               *to = '>';
 737               from += 2;
 738             }
 739           else if (strncmp (from, "amp;", 4) == 0)
 740             {
 741               *to = '&';
 742               from += 3;
 743             }
 744           else if (strncmp (from, "quot;", 5) == 0)
 745             {
 746               *to = '"';
 747               from += 4;
 748             }
 749           else if (strncmp (from, "apos;", 5) == 0)
 750             {
 751               *to = '\'';
 752               from += 4;
 753             }
 754           else
 755             {
 756               if (*from == ';')
 757                 set_unescape_error (context, error,
 758                                     from, G_MARKUP_ERROR_PARSE,
 759                                     _("Empty entity '&;' seen; valid "
 760                                       "entities are: &amp; &quot; &lt; &gt; &apos;"));
 761               else
 762                 {
 763                   const char *end = strchr (from, ';');
 764                   if (end)
 765                     set_unescape_error (context, error,
 766                                         from, G_MARKUP_ERROR_PARSE,
 767                                         _("Entity name '%-.*s' is not known"),
 768                                         (int)(end - from), from);
 769                   else
 770                     set_unescape_error (context, error,
 771                                         from, G_MARKUP_ERROR_PARSE,
 772                                         _("Entity did not end with a semicolon; "
 773                                           "most likely you used an ampersand "
 774                                           "character without intending to start "
 775                                           "an entity - escape ampersand as &amp;"));
 776                 }
 777               return FALSE;
 778             }
 779         }
 780     }
 781
 782   g_assert (to - string->str <= string->len);
 783   if (to - string->str != string->len)
 784     g_string_truncate (string, to - string->str);
 785
 786   *is_ascii = !(mask & 0x80);
 787
 788   return TRUE;
 789 }
 790
 791 static inline gboolean
 792 advance_char (GMarkupParseContext *context)
 793 {
 794   context->iter++;
 795   context->char_number++;
 796
 797   if (G_UNLIKELY (context->iter == context->current_text_end))
 798       return FALSE;
 799
 800   else if (G_UNLIKELY (*context->iter == '\n'))
 801     {
 802       context->line_number++;
 803       context->char_number = 1;
 804     }
 805
 806   return TRUE;
 807 }
 808
 809 static inline gboolean
 810 xml_isspace (char c)
 811 {
 812   return c == ' ' || c == '\t' || c == '\n' || c == '\r';
 813 }
 814
 815 static void
 816 skip_spaces (GMarkupParseContext *context)
 817 {
 818   do
 819     {
 820       if (!xml_isspace (*context->iter))
 821         return;
 822     }
 823   while (advance_char (context));
 824 }
 825
 826 static void
 827 advance_to_name_end (GMarkupParseContext *context)
 828 {
 829   do
 830     {
 831       if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
 832         return;
 833       if (xml_isspace (*(context->iter)))
 834         return;
 835     }
 836   while (advance_char (context));
 837 }
 838
 839 static void
 840 release_chunk (GMarkupParseContext *context, GString *str)
 841 {
 842   GSList *node;
 843   if (!str)
 844     return;
 845   if (str->allocated_len > 256)
 846     { /* large strings are unusual and worth freeing */
 847       g_string_free (str, TRUE);
 848       return;
 849     }
 850   string_blank (str);
 851   node = get_list_node (context, str);
 852   context->spare_chunks = g_slist_concat (node, context->spare_chunks);
 853 }
 854
 855 static void
 856 add_to_partial (GMarkupParseContext *context,
 857                 const gchar         *text_start,
 858                 const gchar         *text_end)
 859 {
 860   if (context->partial_chunk == NULL)
 861     { /* allocate a new chunk to parse into */
 862
 863       if (context->spare_chunks != NULL)
 864         {
 865           GSList *node = context->spare_chunks;
 866           context->spare_chunks = g_slist_remove_link (context->spare_chunks, node);
 867           context->partial_chunk = node->data;
 868           free_list_node (context, node);
 869         }
 870       else
 871         context->partial_chunk = g_string_sized_new (MAX (28, text_end - text_start));
 872     }
 873
 874   if (text_start != text_end)
 875     g_string_insert_len (context->partial_chunk, -1,
 876                          text_start, text_end - text_start);
 877 }
 878
 879 static inline void
 880 truncate_partial (GMarkupParseContext *context)
 881 {
 882   if (context->partial_chunk != NULL)
 883     string_blank (context->partial_chunk);
 884 }
 885
 886 static inline const gchar*
 887 current_element (GMarkupParseContext *context)
 888 {
 889   return context->tag_stack->data;
 890 }
 891
 892 static void
 893 pop_subparser_stack (GMarkupParseContext *context)
 894 {
 895   GMarkupRecursionTracker *tracker;
 896
 897   g_assert (context->subparser_stack);
 898
 899   tracker = context->subparser_stack->data;
 900
 901   context->awaiting_pop = TRUE;
 902   context->held_user_data = context->user_data;
 903
 904   context->user_data = tracker->prev_user_data;
 905   context->parser = tracker->prev_parser;
 906   context->subparser_element = tracker->prev_element;
 907   g_slice_free (GMarkupRecursionTracker, tracker);
 908
 909   context->subparser_stack = g_slist_delete_link (context->subparser_stack,
 910                                                   context->subparser_stack);
 911 }
 912
 913 static void
 914 push_partial_as_tag (GMarkupParseContext *context)
 915 {
 916   GString *str = context->partial_chunk;
 917   /* sadly, this is exported by gmarkup_get_element_stack as-is */
 918   context->tag_stack = g_slist_concat (get_list_node (context, str->str), context->tag_stack);
 919   context->tag_stack_gstr = g_slist_concat (get_list_node (context, str), context->tag_stack_gstr);
 920   context->partial_chunk = NULL;
 921 }
 922
 923 static void
 924 pop_tag (GMarkupParseContext *context)
 925 {
 926   GSList *nodea, *nodeb;
 927
 928   nodea = context->tag_stack;
 929   nodeb = context->tag_stack_gstr;
 930   release_chunk (context, nodeb->data);
 931   context->tag_stack = g_slist_remove_link (context->tag_stack, nodea);
 932   context->tag_stack_gstr = g_slist_remove_link (context->tag_stack_gstr, nodeb);
 933   free_list_node (context, nodea);
 934   free_list_node (context, nodeb);
 935 }
 936
 937 static void
 938 possibly_finish_subparser (GMarkupParseContext *context)
 939 {
 940   if (current_element (context) == context->subparser_element)
 941     pop_subparser_stack (context);
 942 }
 943
 944 static void
 945 ensure_no_outstanding_subparser (GMarkupParseContext *context)
 946 {
 947   if (context->awaiting_pop)
 948     g_critical ("During the first end_element call after invoking a "
 949                 "subparser you must pop the subparser stack and handle "
 950                 "the freeing of the subparser user_data.  This can be "
 951                 "done by calling the end function of the subparser.  "
 952                 "Very probably, your program just leaked memory.");
 953
 954   /* let valgrind watch the pointer disappear... */
 955   context->held_user_data = NULL;
 956   context->awaiting_pop = FALSE;
 957 }
 958
 959 static const gchar*
 960 current_attribute (GMarkupParseContext *context)
 961 {
 962   g_assert (context->cur_attr >= 0);
 963   return context->attr_names[context->cur_attr]->str;
 964 }
 965
 966 static void
 967 add_attribute (GMarkupParseContext *context, GString *str)
 968 {
 969   if (context->cur_attr + 2 >= context->alloc_attrs)
 970     {
 971       context->alloc_attrs += 5; /* silly magic number */
 972       context->attr_names = g_realloc (context->attr_names, sizeof(GString*)*context->alloc_attrs);
 973       context->attr_values = g_realloc (context->attr_values, sizeof(GString*)*context->alloc_attrs);
 974     }
 975   context->cur_attr++;
 976   context->attr_names[context->cur_attr] = str;
 977   context->attr_values[context->cur_attr] = NULL;
 978   context->attr_names[context->cur_attr+1] = NULL;
 979   context->attr_values[context->cur_attr+1] = NULL;
 980 }
 981
 982 static void
 983 clear_attributes (GMarkupParseContext *context)
 984 {
 985   /* Go ahead and free the attributes. */
 986   for (; context->cur_attr >= 0; context->cur_attr--)
 987     {
 988       int pos = context->cur_attr;
 989       release_chunk (context, context->attr_names[pos]);
 990       release_chunk (context, context->attr_values[pos]);
 991       context->attr_names[pos] = context->attr_values[pos] = NULL;
 992     }
 993   g_assert (context->cur_attr == -1);
 994   g_assert (context->attr_names == NULL ||
 995             context->attr_names[0] == NULL);
 996   g_assert (context->attr_values == NULL ||
 997             context->attr_values[0] == NULL);
 998 }
 999
1000 /* This has to be a separate function to ensure the alloca's
1001  * are unwound on exit - otherwise we grow & blow the stack
1002  * with large documents
1003  */
1004 static inline void
1005 emit_start_element (GMarkupParseContext  *context,
1006                     GError              **error)
1007 {
1008   int i, j = 0;
1009   const gchar *start_name;
1010   const gchar **attr_names;
1011   const gchar **attr_values;
1012   GError *tmp_error;
1013
1014   /* In case we want to ignore qualified tags and we see that we have
1015    * one here, we push a subparser.  This will ignore all tags inside of
1016    * the qualified tag.
1017    *
1018    * We deal with the end of the subparser from emit_end_element.
1019    */
1020   if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1021     {
1022       static const GMarkupParser ignore_parser;
1023       g_markup_parse_context_push (context, &ignore_parser, NULL);
1024       clear_attributes (context);
1025       return;
1026     }
1027
1028   attr_names = g_newa (const gchar *, context->cur_attr + 2);
1029   attr_values = g_newa (const gchar *, context->cur_attr + 2);
1030   for (i = 0; i < context->cur_attr + 1; i++)
1031     {
1032       /* Possibly omit qualified attribute names from the list */
1033       if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (context->attr_names[i]->str, ':'))
1034         continue;
1035
1036       attr_names[j] = context->attr_names[i]->str;
1037       attr_values[j] = context->attr_values[i]->str;
1038       j++;
1039     }
1040   attr_names[j] = NULL;
1041   attr_values[j] = NULL;
1042
1043   /* Call user callback for element start */
1044   tmp_error = NULL;
1045   start_name = current_element (context);
1046
1047   if (context->parser->start_element &&
1048       name_validate (context, start_name, error))
1049     (* context->parser->start_element) (context,
1050                                         start_name,
1051                                         (const gchar **)attr_names,
1052                                         (const gchar **)attr_values,
1053                                         context->user_data,
1054                                         &tmp_error);
1055   clear_attributes (context);
1056
1057   if (tmp_error != NULL)
1058     propagate_error (context, error, tmp_error);
1059 }
1060
1061 static void
1062 emit_end_element (GMarkupParseContext  *context,
1063                   GError              **error)
1064 {
1065   /* We need to pop the tag stack and call the end_element
1066    * function, since this is the close tag
1067    */
1068   GError *tmp_error = NULL;
1069
1070   g_assert (context->tag_stack != NULL);
1071
1072   possibly_finish_subparser (context);
1073
1074   /* We might have just returned from our ignore subparser */
1075   if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
1076     {
1077       g_markup_parse_context_pop (context);
1078       pop_tag (context);
1079       return;
1080     }
1081
1082   tmp_error = NULL;
1083   if (context->parser->end_element)
1084     (* context->parser->end_element) (context,
1085                                       current_element (context),
1086                                       context->user_data,
1087                                       &tmp_error);
1088
1089   ensure_no_outstanding_subparser (context);
1090
1091   if (tmp_error)
1092     {
1093       mark_error (context, tmp_error);
1094       g_propagate_error (error, tmp_error);
1095     }
1096
1097   pop_tag (context);
1098 }
1099
1100 /**
1101  * g_markup_parse_context_parse:
1102  * @context: a #GMarkupParseContext
1103  * @text: chunk of text to parse
1104  * @text_len: length of @text in bytes
1105  * @error: return location for a #GError
1106  *
1107  * Feed some data to the #GMarkupParseContext.
1108  *
1109  * The data need not be valid UTF-8; an error will be signaled if
1110  * it's invalid. The data need not be an entire document; you can
1111  * feed a document into the parser incrementally, via multiple calls
1112  * to this function. Typically, as you receive data from a network
1113  * connection or file, you feed each received chunk of data into this
1114  * function, aborting the process if an error occurs. Once an error
1115  * is reported, no further data may be fed to the #GMarkupParseContext;
1116  * all errors are fatal.
1117  *
1118  * Returns: %FALSE if an error occurred, %TRUE on success
1119  */
1120 gboolean
1121 g_markup_parse_context_parse (GMarkupParseContext  *context,
1122                               const gchar          *text,
1123                               gssize                text_len,
1124                               GError              **error)
1125 {
1126   g_return_val_if_fail (context != NULL, FALSE);
1127   g_return_val_if_fail (text != NULL, FALSE);
1128   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1129   g_return_val_if_fail (!context->parsing, FALSE);
1130
1131   if (text_len < 0)
1132     text_len = strlen (text);
1133
1134   if (text_len == 0)
1135     return TRUE;
1136
1137   context->parsing = TRUE;
1138
1139
1140   context->current_text = text;
1141   context->current_text_len = text_len;
1142   context->current_text_end = context->current_text + text_len;
1143   context->iter = context->current_text;
1144   context->start = context->iter;
1145
1146   while (context->iter != context->current_text_end)
1147     {
1148       switch (context->state)
1149         {
1150         case STATE_START:
1151           /* Possible next state: AFTER_OPEN_ANGLE */
1152
1153           g_assert (context->tag_stack == NULL);
1154
1155           /* whitespace is ignored outside of any elements */
1156           skip_spaces (context);
1157
1158           if (context->iter != context->current_text_end)
1159             {
1160               if (*context->iter == '<')
1161                 {
1162                   /* Move after the open angle */
1163                   advance_char (context);
1164
1165                   context->state = STATE_AFTER_OPEN_ANGLE;
1166
1167                   /* this could start a passthrough */
1168                   context->start = context->iter;
1169
1170                   /* document is now non-empty */
1171                   context->document_empty = FALSE;
1172                 }
1173               else
1174                 {
1175                   set_error_literal (context,
1176                                      error,
1177                                      G_MARKUP_ERROR_PARSE,
1178                                      _("Document must begin with an element (e.g. <book>)"));
1179                 }
1180             }
1181           break;
1182
1183         case STATE_AFTER_OPEN_ANGLE:
1184           /* Possible next states: INSIDE_OPEN_TAG_NAME,
1185            *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1186            */
1187           if (*context->iter == '?' ||
1188               *context->iter == '!')
1189             {
1190               /* include < in the passthrough */
1191               const gchar *openangle = "<";
1192               add_to_partial (context, openangle, openangle + 1);
1193               context->start = context->iter;
1194               context->balance = 1;
1195               context->state = STATE_INSIDE_PASSTHROUGH;
1196             }
1197           else if (*context->iter == '/')
1198             {
1199               /* move after it */
1200               advance_char (context);
1201
1202               context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1203             }
1204           else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1205             {
1206               context->state = STATE_INSIDE_OPEN_TAG_NAME;
1207
1208               /* start of tag name */
1209               context->start = context->iter;
1210             }
1211           else
1212             {
1213               gchar buf[8];
1214
1215               set_error (context,
1216                          error,
1217                          G_MARKUP_ERROR_PARSE,
1218                          _("'%s' is not a valid character following "
1219                            "a '<' character; it may not begin an "
1220                            "element name"),
1221                          utf8_str (context->iter, buf));
1222             }
1223           break;
1224
1225           /* The AFTER_CLOSE_ANGLE state is actually sort of
1226            * broken, because it doesn't correspond to a range
1227            * of characters in the input stream as the others do,
1228            * and thus makes things harder to conceptualize
1229            */
1230         case STATE_AFTER_CLOSE_ANGLE:
1231           /* Possible next states: INSIDE_TEXT, STATE_START */
1232           if (context->tag_stack == NULL)
1233             {
1234               context->start = NULL;
1235               context->state = STATE_START;
1236             }
1237           else
1238             {
1239               context->start = context->iter;
1240               context->state = STATE_INSIDE_TEXT;
1241             }
1242           break;
1243
1244         case STATE_AFTER_ELISION_SLASH:
1245           /* Possible next state: AFTER_CLOSE_ANGLE */
1246           if (*context->iter == '>')
1247             {
1248               /* move after the close angle */
1249               advance_char (context);
1250               context->state = STATE_AFTER_CLOSE_ANGLE;
1251               emit_end_element (context, error);
1252             }
1253           else
1254             {
1255               gchar buf[8];
1256
1257               set_error (context,
1258                          error,
1259                          G_MARKUP_ERROR_PARSE,
1260                          _("Odd character '%s', expected a '>' character "
1261                            "to end the empty-element tag '%s'"),
1262                          utf8_str (context->iter, buf),
1263                          current_element (context));
1264             }
1265           break;
1266
1267         case STATE_INSIDE_OPEN_TAG_NAME:
1268           /* Possible next states: BETWEEN_ATTRIBUTES */
1269
1270           /* if there's a partial chunk then it's the first part of the
1271            * tag name. If there's a context->start then it's the start
1272            * of the tag name in current_text, the partial chunk goes
1273            * before that start though.
1274            */
1275           advance_to_name_end (context);
1276
1277           if (context->iter == context->current_text_end)
1278             {
1279               /* The name hasn't necessarily ended. Merge with
1280                * partial chunk, leave state unchanged.
1281                */
1282               add_to_partial (context, context->start, context->iter);
1283             }
1284           else
1285             {
1286               /* The name has ended. Combine it with the partial chunk
1287                * if any; push it on the stack; enter next state.
1288                */
1289               add_to_partial (context, context->start, context->iter);
1290               push_partial_as_tag (context);
1291
1292               context->state = STATE_BETWEEN_ATTRIBUTES;
1293               context->start = NULL;
1294             }
1295           break;
1296
1297         case STATE_INSIDE_ATTRIBUTE_NAME:
1298           /* Possible next states: AFTER_ATTRIBUTE_NAME */
1299
1300           advance_to_name_end (context);
1301           add_to_partial (context, context->start, context->iter);
1302
1303           /* read the full name, if we enter the equals sign state
1304            * then add the attribute to the list (without the value),
1305            * otherwise store a partial chunk to be prepended later.
1306            */
1307           if (context->iter != context->current_text_end)
1308             context->state = STATE_AFTER_ATTRIBUTE_NAME;
1309           break;
1310
1311         case STATE_AFTER_ATTRIBUTE_NAME:
1312           /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1313
1314           skip_spaces (context);
1315
1316           if (context->iter != context->current_text_end)
1317             {
1318               /* The name has ended. Combine it with the partial chunk
1319                * if any; push it on the stack; enter next state.
1320                */
1321               if (!name_validate (context, context->partial_chunk->str, error))
1322                 break;
1323
1324               add_attribute (context, context->partial_chunk);
1325
1326               context->partial_chunk = NULL;
1327               context->start = NULL;
1328
1329               if (*context->iter == '=')
1330                 {
1331                   advance_char (context);
1332                   context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1333                 }
1334               else
1335                 {
1336                   gchar buf[8];
1337
1338                   set_error (context,
1339                              error,
1340                              G_MARKUP_ERROR_PARSE,
1341                              _("Odd character '%s', expected a '=' after "
1342                                "attribute name '%s' of element '%s'"),
1343                              utf8_str (context->iter, buf),
1344                              current_attribute (context),
1345                              current_element (context));
1346
1347                 }
1348             }
1349           break;
1350
1351         case STATE_BETWEEN_ATTRIBUTES:
1352           /* Possible next states: AFTER_CLOSE_ANGLE,
1353            * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1354            */
1355           skip_spaces (context);
1356
1357           if (context->iter != context->current_text_end)
1358             {
1359               if (*context->iter == '/')
1360                 {
1361                   advance_char (context);
1362                   context->state = STATE_AFTER_ELISION_SLASH;
1363                 }
1364               else if (*context->iter == '>')
1365                 {
1366                   advance_char (context);
1367                   context->state = STATE_AFTER_CLOSE_ANGLE;
1368                 }
1369               else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1370                 {
1371                   context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1372                   /* start of attribute name */
1373                   context->start = context->iter;
1374                 }
1375               else
1376                 {
1377                   gchar buf[8];
1378
1379                   set_error (context,
1380                              error,
1381                              G_MARKUP_ERROR_PARSE,
1382                              _("Odd character '%s', expected a '>' or '/' "
1383                                "character to end the start tag of "
1384                                "element '%s', or optionally an attribute; "
1385                                "perhaps you used an invalid character in "
1386                                "an attribute name"),
1387                              utf8_str (context->iter, buf),
1388                              current_element (context));
1389                 }
1390
1391               /* If we're done with attributes, invoke
1392                * the start_element callback
1393                */
1394               if (context->state == STATE_AFTER_ELISION_SLASH ||
1395                   context->state == STATE_AFTER_CLOSE_ANGLE)
1396                 emit_start_element (context, error);
1397             }
1398           break;
1399
1400         case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1401           /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1402
1403           skip_spaces (context);
1404
1405           if (context->iter != context->current_text_end)
1406             {
1407               if (*context->iter == '"')
1408                 {
1409                   advance_char (context);
1410                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1411                   context->start = context->iter;
1412                 }
1413               else if (*context->iter == '\'')
1414                 {
1415                   advance_char (context);
1416                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1417                   context->start = context->iter;
1418                 }
1419               else
1420                 {
1421                   gchar buf[8];
1422
1423                   set_error (context,
1424                              error,
1425                              G_MARKUP_ERROR_PARSE,
1426                              _("Odd character '%s', expected an open quote mark "
1427                                "after the equals sign when giving value for "
1428                                "attribute '%s' of element '%s'"),
1429                              utf8_str (context->iter, buf),
1430                              current_attribute (context),
1431                              current_element (context));
1432                 }
1433             }
1434           break;
1435
1436         case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1437         case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1438           /* Possible next states: BETWEEN_ATTRIBUTES */
1439           {
1440             gchar delim;
1441
1442             if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1443               {
1444                 delim = '\'';
1445               }
1446             else
1447               {
1448                 delim = '"';
1449               }
1450
1451             do
1452               {
1453                 if (*context->iter == delim)
1454                   break;
1455               }
1456             while (advance_char (context));
1457           }
1458           if (context->iter == context->current_text_end)
1459             {
1460               /* The value hasn't necessarily ended. Merge with
1461                * partial chunk, leave state unchanged.
1462                */
1463               add_to_partial (context, context->start, context->iter);
1464             }
1465           else
1466             {
1467               gboolean is_ascii;
1468               /* The value has ended at the quote mark. Combine it
1469                * with the partial chunk if any; set it for the current
1470                * attribute.
1471                */
1472               add_to_partial (context, context->start, context->iter);
1473
1474               g_assert (context->cur_attr >= 0);
1475
1476               if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1477                   (is_ascii || text_validate (context, context->partial_chunk->str,
1478                                               context->partial_chunk->len, error)))
1479                 {
1480                   /* success, advance past quote and set state. */
1481                   context->attr_values[context->cur_attr] = context->partial_chunk;
1482                   context->partial_chunk = NULL;
1483                   advance_char (context);
1484                   context->state = STATE_BETWEEN_ATTRIBUTES;
1485                   context->start = NULL;
1486                 }
1487
1488               truncate_partial (context);
1489             }
1490           break;
1491
1492         case STATE_INSIDE_TEXT:
1493           /* Possible next states: AFTER_OPEN_ANGLE */
1494           do
1495             {
1496               if (*context->iter == '<')
1497                 break;
1498             }
1499           while (advance_char (context));
1500
1501           /* The text hasn't necessarily ended. Merge with
1502            * partial chunk, leave state unchanged.
1503            */
1504
1505           add_to_partial (context, context->start, context->iter);
1506
1507           if (context->iter != context->current_text_end)
1508             {
1509               gboolean is_ascii;
1510
1511               /* The text has ended at the open angle. Call the text
1512                * callback.
1513                */
1514               if (unescape_gstring_inplace (context, context->partial_chunk, &is_ascii, error) &&
1515                   (is_ascii || text_validate (context, context->partial_chunk->str,
1516                                               context->partial_chunk->len, error)))
1517                 {
1518                   GError *tmp_error = NULL;
1519
1520                   if (context->parser->text)
1521                     (*context->parser->text) (context,
1522                                               context->partial_chunk->str,
1523                                               context->partial_chunk->len,
1524                                               context->user_data,
1525                                               &tmp_error);
1526
1527                   if (tmp_error == NULL)
1528                     {
1529                       /* advance past open angle and set state. */
1530                       advance_char (context);
1531                       context->state = STATE_AFTER_OPEN_ANGLE;
1532                       /* could begin a passthrough */
1533                       context->start = context->iter;
1534                     }
1535                   else
1536                     propagate_error (context, error, tmp_error);
1537                 }
1538
1539               truncate_partial (context);
1540             }
1541           break;
1542
1543         case STATE_AFTER_CLOSE_TAG_SLASH:
1544           /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1545           if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1546             {
1547               context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1548
1549               /* start of tag name */
1550               context->start = context->iter;
1551             }
1552           else
1553             {
1554               gchar buf[8];
1555
1556               set_error (context,
1557                          error,
1558                          G_MARKUP_ERROR_PARSE,
1559                          _("'%s' is not a valid character following "
1560                            "the characters '</'; '%s' may not begin an "
1561                            "element name"),
1562                          utf8_str (context->iter, buf),
1563                          utf8_str (context->iter, buf));
1564             }
1565           break;
1566
1567         case STATE_INSIDE_CLOSE_TAG_NAME:
1568           /* Possible next state: AFTER_CLOSE_TAG_NAME */
1569           advance_to_name_end (context);
1570           add_to_partial (context, context->start, context->iter);
1571
1572           if (context->iter != context->current_text_end)
1573             context->state = STATE_AFTER_CLOSE_TAG_NAME;
1574           break;
1575
1576         case STATE_AFTER_CLOSE_TAG_NAME:
1577           /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1578
1579           skip_spaces (context);
1580
1581           if (context->iter != context->current_text_end)
1582             {
1583               GString *close_name;
1584
1585               close_name = context->partial_chunk;
1586               context->partial_chunk = NULL;
1587
1588               if (*context->iter != '>')
1589                 {
1590                   gchar buf[8];
1591
1592                   set_error (context,
1593                              error,
1594                              G_MARKUP_ERROR_PARSE,
1595                              _("'%s' is not a valid character following "
1596                                "the close element name '%s'; the allowed "
1597                                "character is '>'"),
1598                              utf8_str (context->iter, buf),
1599                              close_name->str);
1600                 }
1601               else if (context->tag_stack == NULL)
1602                 {
1603                   set_error (context,
1604                              error,
1605                              G_MARKUP_ERROR_PARSE,
1606                              _("Element '%s' was closed, no element "
1607                                "is currently open"),
1608                              close_name->str);
1609                 }
1610               else if (strcmp (close_name->str, current_element (context)) != 0)
1611                 {
1612                   set_error (context,
1613                              error,
1614                              G_MARKUP_ERROR_PARSE,
1615                              _("Element '%s' was closed, but the currently "
1616                                "open element is '%s'"),
1617                              close_name->str,
1618                              current_element (context));
1619                 }
1620               else
1621                 {
1622                   advance_char (context);
1623                   context->state = STATE_AFTER_CLOSE_ANGLE;
1624                   context->start = NULL;
1625
1626                   emit_end_element (context, error);
1627                 }
1628               context->partial_chunk = close_name;
1629               truncate_partial (context);
1630             }
1631           break;
1632
1633         case STATE_INSIDE_PASSTHROUGH:
1634           /* Possible next state: AFTER_CLOSE_ANGLE */
1635           do
1636             {
1637               if (*context->iter == '<')
1638                 context->balance++;
1639               if (*context->iter == '>')
1640                 {
1641                   gchar *str;
1642                   gsize len;
1643
1644                   context->balance--;
1645                   add_to_partial (context, context->start, context->iter);
1646                   context->start = context->iter;
1647
1648                   str = context->partial_chunk->str;
1649                   len = context->partial_chunk->len;
1650
1651                   if (str[1] == '?' && str[len - 1] == '?')
1652                     break;
1653                   if (strncmp (str, "<!--", 4) == 0 &&
1654                       strcmp (str + len - 2, "--") == 0)
1655                     break;
1656                   if (strncmp (str, "<![CDATA[", 9) == 0 &&
1657                       strcmp (str + len - 2, "]]") == 0)
1658                     break;
1659                   if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
1660                       context->balance == 0)
1661                     break;
1662                 }
1663             }
1664           while (advance_char (context));
1665
1666           if (context->iter == context->current_text_end)
1667             {
1668               /* The passthrough hasn't necessarily ended. Merge with
1669                * partial chunk, leave state unchanged.
1670                */
1671                add_to_partial (context, context->start, context->iter);
1672             }
1673           else
1674             {
1675               /* The passthrough has ended at the close angle. Combine
1676                * it with the partial chunk if any. Call the passthrough
1677                * callback. Note that the open/close angles are
1678                * included in the text of the passthrough.
1679                */
1680               GError *tmp_error = NULL;
1681
1682               advance_char (context); /* advance past close angle */
1683               add_to_partial (context, context->start, context->iter);
1684
1685               if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT &&
1686                   strncmp (context->partial_chunk->str, "<![CDATA[", 9) == 0)
1687                 {
1688                   if (context->parser->text &&
1689                       text_validate (context,
1690                                      context->partial_chunk->str + 9,
1691                                      context->partial_chunk->len - 12,
1692                                      error))
1693                     (*context->parser->text) (context,
1694                                               context->partial_chunk->str + 9,
1695                                               context->partial_chunk->len - 12,
1696                                               context->user_data,
1697                                               &tmp_error);
1698                 }
1699               else if (context->parser->passthrough &&
1700                        text_validate (context,
1701                                       context->partial_chunk->str,
1702                                       context->partial_chunk->len,
1703                                       error))
1704                 (*context->parser->passthrough) (context,
1705                                                  context->partial_chunk->str,
1706                                                  context->partial_chunk->len,
1707                                                  context->user_data,
1708                                                  &tmp_error);
1709
1710               truncate_partial (context);
1711
1712               if (tmp_error == NULL)
1713                 {
1714                   context->state = STATE_AFTER_CLOSE_ANGLE;
1715                   context->start = context->iter; /* could begin text */
1716                 }
1717               else
1718                 propagate_error (context, error, tmp_error);
1719             }
1720           break;
1721
1722         case STATE_ERROR:
1723           goto finished;
1724           break;
1725
1726         default:
1727           g_assert_not_reached ();
1728           break;
1729         }
1730     }
1731
1732  finished:
1733   context->parsing = FALSE;
1734
1735   return context->state != STATE_ERROR;
1736 }
1737
1738 /**
1739  * g_markup_parse_context_end_parse:
1740  * @context: a #GMarkupParseContext
1741  * @error: return location for a #GError
1742  *
1743  * Signals to the #GMarkupParseContext that all data has been
1744  * fed into the parse context with g_markup_parse_context_parse().
1745  *
1746  * This function reports an error if the document isn't complete,
1747  * for example if elements are still open.
1748  *
1749  * Returns: %TRUE on success, %FALSE if an error was set
1750  */
1751 gboolean
1752 g_markup_parse_context_end_parse (GMarkupParseContext  *context,
1753                                   GError              **error)
1754 {
1755   g_return_val_if_fail (context != NULL, FALSE);
1756   g_return_val_if_fail (!context->parsing, FALSE);
1757   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1758
1759   if (context->partial_chunk != NULL)
1760     {
1761       g_string_free (context->partial_chunk, TRUE);
1762       context->partial_chunk = NULL;
1763     }
1764
1765   if (context->document_empty)
1766     {
1767       set_error_literal (context, error, G_MARKUP_ERROR_EMPTY,
1768                          _("Document was empty or contained only whitespace"));
1769       return FALSE;
1770     }
1771
1772   context->parsing = TRUE;
1773
1774   switch (context->state)
1775     {
1776     case STATE_START:
1777       /* Nothing to do */
1778       break;
1779
1780     case STATE_AFTER_OPEN_ANGLE:
1781       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1782                          _("Document ended unexpectedly just after an open angle bracket '<'"));
1783       break;
1784
1785     case STATE_AFTER_CLOSE_ANGLE:
1786       if (context->tag_stack != NULL)
1787         {
1788           /* Error message the same as for INSIDE_TEXT */
1789           set_error (context, error, G_MARKUP_ERROR_PARSE,
1790                      _("Document ended unexpectedly with elements still open - "
1791                        "'%s' was the last element opened"),
1792                      current_element (context));
1793         }
1794       break;
1795
1796     case STATE_AFTER_ELISION_SLASH:
1797       set_error (context, error, G_MARKUP_ERROR_PARSE,
1798                  _("Document ended unexpectedly, expected to see a close angle "
1799                    "bracket ending the tag <%s/>"), current_element (context));
1800       break;
1801
1802     case STATE_INSIDE_OPEN_TAG_NAME:
1803       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1804                          _("Document ended unexpectedly inside an element name"));
1805       break;
1806
1807     case STATE_INSIDE_ATTRIBUTE_NAME:
1808     case STATE_AFTER_ATTRIBUTE_NAME:
1809       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1810                          _("Document ended unexpectedly inside an attribute name"));
1811       break;
1812
1813     case STATE_BETWEEN_ATTRIBUTES:
1814       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1815                          _("Document ended unexpectedly inside an element-opening "
1816                            "tag."));
1817       break;
1818
1819     case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1820       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1821                          _("Document ended unexpectedly after the equals sign "
1822                            "following an attribute name; no attribute value"));
1823       break;
1824
1825     case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1826     case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1827       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1828                          _("Document ended unexpectedly while inside an attribute "
1829                            "value"));
1830       break;
1831
1832     case STATE_INSIDE_TEXT:
1833       g_assert (context->tag_stack != NULL);
1834       set_error (context, error, G_MARKUP_ERROR_PARSE,
1835                  _("Document ended unexpectedly with elements still open - "
1836                    "'%s' was the last element opened"),
1837                  current_element (context));
1838       break;
1839
1840     case STATE_AFTER_CLOSE_TAG_SLASH:
1841     case STATE_INSIDE_CLOSE_TAG_NAME:
1842     case STATE_AFTER_CLOSE_TAG_NAME:
1843       set_error (context, error, G_MARKUP_ERROR_PARSE,
1844                  _("Document ended unexpectedly inside the close tag for "
1845                    "element '%s'"), current_element (context));
1846       break;
1847
1848     case STATE_INSIDE_PASSTHROUGH:
1849       set_error_literal (context, error, G_MARKUP_ERROR_PARSE,
1850                          _("Document ended unexpectedly inside a comment or "
1851                            "processing instruction"));
1852       break;
1853
1854     case STATE_ERROR:
1855     default:
1856       g_assert_not_reached ();
1857       break;
1858     }
1859
1860   context->parsing = FALSE;
1861
1862   return context->state != STATE_ERROR;
1863 }
1864
1865 /**
1866  * g_markup_parse_context_get_element:
1867  * @context: a #GMarkupParseContext
1868  *
1869  * Retrieves the name of the currently open element.
1870  *
1871  * If called from the start_element or end_element handlers this will
1872  * give the element_name as passed to those functions. For the parent
1873  * elements, see g_markup_parse_context_get_element_stack().
1874  *
1875  * Returns: the name of the currently open element, or %NULL
1876  *
1877  * Since: 2.2
1878  */
1879 const gchar *
1880 g_markup_parse_context_get_element (GMarkupParseContext *context)
1881 {
1882   g_return_val_if_fail (context != NULL, NULL);
1883
1884   if (context->tag_stack == NULL)
1885     return NULL;
1886   else
1887     return current_element (context);
1888 }
1889
1890 /**
1891  * g_markup_parse_context_get_element_stack:
1892  * @context: a #GMarkupParseContext
1893  *
1894  * Retrieves the element stack from the internal state of the parser.
1895  *
1896  * The returned #GSList is a list of strings where the first item is
1897  * the currently open tag (as would be returned by
1898  * g_markup_parse_context_get_element()) and the next item is its
1899  * immediate parent.
1900  *
1901  * This function is intended to be used in the start_element and
1902  * end_element handlers where g_markup_parse_context_get_element()
1903  * would merely return the name of the element that is being
1904  * processed.
1905  *
1906  * Returns: the element stack, which must not be modified
1907  *
1908  * Since: 2.16
1909  */
1910 const GSList *
1911 g_markup_parse_context_get_element_stack (GMarkupParseContext *context)
1912 {
1913   g_return_val_if_fail (context != NULL, NULL);
1914   return context->tag_stack;
1915 }
1916
1917 /**
1918  * g_markup_parse_context_get_position:
1919  * @context: a #GMarkupParseContext
1920  * @line_number: (allow-none): return location for a line number, or %NULL
1921  * @char_number: (allow-none): return location for a char-on-line number, or %NULL
1922  *
1923  * Retrieves the current line number and the number of the character on
1924  * that line. Intended for use in error messages; there are no strict
1925  * semantics for what constitutes the "current" line number other than
1926  * "the best number we could come up with for error messages."
1927  */
1928 void
1929 g_markup_parse_context_get_position (GMarkupParseContext *context,
1930                                      gint                *line_number,
1931                                      gint                *char_number)
1932 {
1933   g_return_if_fail (context != NULL);
1934
1935   if (line_number)
1936     *line_number = context->line_number;
1937
1938   if (char_number)
1939     *char_number = context->char_number;
1940 }
1941
1942 /**
1943  * g_markup_parse_context_get_user_data:
1944  * @context: a #GMarkupParseContext
1945  *
1946  * Returns the user_data associated with @context.
1947  *
1948  * This will either be the user_data that was provided to
1949  * g_markup_parse_context_new() or to the most recent call
1950  * of g_markup_parse_context_push().
1951  *
1952  * Returns: the provided user_data. The returned data belongs to
1953  *     the markup context and will be freed when
1954  *     g_markup_parse_context_free() is called.
1955  *
1956  * Since: 2.18
1957  */
1958 gpointer
1959 g_markup_parse_context_get_user_data (GMarkupParseContext *context)
1960 {
1961   return context->user_data;
1962 }
1963
1964 /**
1965  * g_markup_parse_context_push:
1966  * @context: a #GMarkupParseContext
1967  * @parser: a #GMarkupParser
1968  * @user_data: user data to pass to #GMarkupParser functions
1969  *
1970  * Temporarily redirects markup data to a sub-parser.
1971  *
1972  * This function may only be called from the start_element handler of
1973  * a #GMarkupParser. It must be matched with a corresponding call to
1974  * g_markup_parse_context_pop() in the matching end_element handler
1975  * (except in the case that the parser aborts due to an error).
1976  *
1977  * All tags, text and other data between the matching tags is
1978  * redirected to the subparser given by @parser. @user_data is used
1979  * as the user_data for that parser. @user_data is also passed to the
1980  * error callback in the event that an error occurs. This includes
1981  * errors that occur in subparsers of the subparser.
1982  *
1983  * The end tag matching the start tag for which this call was made is
1984  * handled by the previous parser (which is given its own user_data)
1985  * which is why g_markup_parse_context_pop() is provided to allow "one
1986  * last access" to the @user_data provided to this function. In the
1987  * case of error, the @user_data provided here is passed directly to
1988  * the error callback of the subparser and g_markup_parse_context_pop()
1989  * should not be called. In either case, if @user_data was allocated
1990  * then it ought to be freed from both of these locations.
1991  *
1992  * This function is not intended to be directly called by users
1993  * interested in invoking subparsers. Instead, it is intended to be
1994  * used by the subparsers themselves to implement a higher-level
1995  * interface.
1996  *
1997  * As an example, see the following implementation of a simple
1998  * parser that counts the number of tags encountered.
1999  *
2000  * |[<!-- language="C" -->
2001  * typedef struct
2002  * {
2003  *   gint tag_count;
2004  * } CounterData;
2005  *
2006  * static void
2007  * counter_start_element (GMarkupParseContext  *context,
2008  *                        const gchar          *element_name,
2009  *                        const gchar         **attribute_names,
2010  *                        const gchar         **attribute_values,
2011  *                        gpointer              user_data,
2012  *                        GError              **error)
2013  * {
2014  *   CounterData *data = user_data;
2015  *
2016  *   data->tag_count++;
2017  * }
2018  *
2019  * static void
2020  * counter_error (GMarkupParseContext *context,
2021  *                GError              *error,
2022  *                gpointer             user_data)
2023  * {
2024  *   CounterData *data = user_data;
2025  *
2026  *   g_slice_free (CounterData, data);
2027  * }
2028  *
2029  * static GMarkupParser counter_subparser =
2030  * {
2031  *   counter_start_element,
2032  *   NULL,
2033  *   NULL,
2034  *   NULL,
2035  *   counter_error
2036  * };
2037  * ]|
2038  *
2039  * In order to allow this parser to be easily used as a subparser, the
2040  * following interface is provided:
2041  *
2042  * |[<!-- language="C" -->
2043  * void
2044  * start_counting (GMarkupParseContext *context)
2045  * {
2046  *   CounterData *data = g_slice_new (CounterData);
2047  *
2048  *   data->tag_count = 0;
2049  *   g_markup_parse_context_push (context, &counter_subparser, data);
2050  * }
2051  *
2052  * gint
2053  * end_counting (GMarkupParseContext *context)
2054  * {
2055  *   CounterData *data = g_markup_parse_context_pop (context);
2056  *   int result;
2057  *
2058  *   result = data->tag_count;
2059  *   g_slice_free (CounterData, data);
2060  *
2061  *   return result;
2062  * }
2063  * ]|
2064  *
2065  * The subparser would then be used as follows:
2066  *
2067  * |[<!-- language="C" -->
2068  * static void start_element (context, element_name, ...)
2069  * {
2070  *   if (strcmp (element_name, "count-these") == 0)
2071  *     start_counting (context);
2072  *
2073  *   // else, handle other tags...
2074  * }
2075  *
2076  * static void end_element (context, element_name, ...)
2077  * {
2078  *   if (strcmp (element_name, "count-these") == 0)
2079  *     g_print ("Counted %d tags\n", end_counting (context));
2080  *
2081  *   // else, handle other tags...
2082  * }
2083  * ]|
2084  *
2085  * Since: 2.18
2086  **/
2087 void
2088 g_markup_parse_context_push (GMarkupParseContext *context,
2089                              const GMarkupParser *parser,
2090                              gpointer             user_data)
2091 {
2092   GMarkupRecursionTracker *tracker;
2093
2094   tracker = g_slice_new (GMarkupRecursionTracker);
2095   tracker->prev_element = context->subparser_element;
2096   tracker->prev_parser = context->parser;
2097   tracker->prev_user_data = context->user_data;
2098
2099   context->subparser_element = current_element (context);
2100   context->parser = parser;
2101   context->user_data = user_data;
2102
2103   context->subparser_stack = g_slist_prepend (context->subparser_stack,
2104                                               tracker);
2105 }
2106
2107 /**
2108  * g_markup_parse_context_pop:
2109  * @context: a #GMarkupParseContext
2110  *
2111  * Completes the process of a temporary sub-parser redirection.
2112  *
2113  * This function exists to collect the user_data allocated by a
2114  * matching call to g_markup_parse_context_push(). It must be called
2115  * in the end_element handler corresponding to the start_element
2116  * handler during which g_markup_parse_context_push() was called.
2117  * You must not call this function from the error callback -- the
2118  * @user_data is provided directly to the callback in that case.
2119  *
2120  * This function is not intended to be directly called by users
2121  * interested in invoking subparsers. Instead, it is intended to
2122  * be used by the subparsers themselves to implement a higher-level
2123  * interface.
2124  *
2125  * Returns: the user data passed to g_markup_parse_context_push()
2126  *
2127  * Since: 2.18
2128  */
2129 gpointer
2130 g_markup_parse_context_pop (GMarkupParseContext *context)
2131 {
2132   gpointer user_data;
2133
2134   if (!context->awaiting_pop)
2135     possibly_finish_subparser (context);
2136
2137   g_assert (context->awaiting_pop);
2138
2139   context->awaiting_pop = FALSE;
2140
2141   /* valgrind friendliness */
2142   user_data = context->held_user_data;
2143   context->held_user_data = NULL;
2144
2145   return user_data;
2146 }
2147
2148 static void
2149 append_escaped_text (GString     *str,
2150                      const gchar *text,
2151                      gssize       length)
2152 {
2153   const gchar *p;
2154   const gchar *end;
2155   gunichar c;
2156
2157   p = text;
2158   end = text + length;
2159
2160   while (p != end)
2161     {
2162       const gchar *next;
2163       next = g_utf8_next_char (p);
2164
2165       switch (*p)
2166         {
2167         case '&':
2168           g_string_append (str, "&amp;");
2169           break;
2170
2171         case '<':
2172           g_string_append (str, "&lt;");
2173           break;
2174
2175         case '>':
2176           g_string_append (str, "&gt;");
2177           break;
2178
2179         case '\'':
2180           g_string_append (str, "&apos;");
2181           break;
2182
2183         case '"':
2184           g_string_append (str, "&quot;");
2185           break;
2186
2187         default:
2188           c = g_utf8_get_char (p);
2189           if ((0x1 <= c && c <= 0x8) ||
2190               (0xb <= c && c  <= 0xc) ||
2191               (0xe <= c && c <= 0x1f) ||
2192               (0x7f <= c && c <= 0x84) ||
2193               (0x86 <= c && c <= 0x9f))
2194             g_string_append_printf (str, "&#x%x;", c);
2195           else
2196             g_string_append_len (str, p, next - p);
2197           break;
2198         }
2199
2200       p = next;
2201     }
2202 }
2203
2204 /**
2205  * g_markup_escape_text:
2206  * @text: some valid UTF-8 text
2207  * @length: length of @text in bytes, or -1 if the text is nul-terminated
2208  *
2209  * Escapes text so that the markup parser will parse it verbatim.
2210  * Less than, greater than, ampersand, etc. are replaced with the
2211  * corresponding entities. This function would typically be used
2212  * when writing out a file to be parsed with the markup parser.
2213  *
2214  * Note that this function doesn't protect whitespace and line endings
2215  * from being processed according to the XML rules for normalization
2216  * of line endings and attribute values.
2217  *
2218  * Note also that this function will produce character references in
2219  * the range of &#x1; ... &#x1f; for all control sequences
2220  * except for tabstop, newline and carriage return.  The character
2221  * references in this range are not valid XML 1.0, but they are
2222  * valid XML 1.1 and will be accepted by the GMarkup parser.
2223  *
2224  * Returns: a newly allocated string with the escaped text
2225  */
2226 gchar*
2227 g_markup_escape_text (const gchar *text,
2228                       gssize       length)
2229 {
2230   GString *str;
2231
2232   g_return_val_if_fail (text != NULL, NULL);
2233
2234   if (length < 0)
2235     length = strlen (text);
2236
2237   /* prealloc at least as long as original text */
2238   str = g_string_sized_new (length);
2239   append_escaped_text (str, text, length);
2240
2241   return g_string_free (str, FALSE);
2242 }
2243
2244 /*
2245  * find_conversion:
2246  * @format: a printf-style format string
2247  * @after: location to store a pointer to the character after
2248  *     the returned conversion. On a %NULL return, returns the
2249  *     pointer to the trailing NUL in the string
2250  *
2251  * Find the next conversion in a printf-style format string.
2252  * Partially based on code from printf-parser.c,
2253  * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2254  *
2255  * Returns: pointer to the next conversion in @format,
2256  *  or %NULL, if none.
2257  */
2258 static const char *
2259 find_conversion (const char  *format,
2260                  const char **after)
2261 {
2262   const char *start = format;
2263   const char *cp;
2264
2265   while (*start != '\0' && *start != '%')
2266     start++;
2267
2268   if (*start == '\0')
2269     {
2270       *after = start;
2271       return NULL;
2272     }
2273
2274   cp = start + 1;
2275
2276   if (*cp == '\0')
2277     {
2278       *after = cp;
2279       return NULL;
2280     }
2281
2282   /* Test for positional argument.  */
2283   if (*cp >= '0' && *cp <= '9')
2284     {
2285       const char *np;
2286
2287       for (np = cp; *np >= '0' && *np <= '9'; np++)
2288         ;
2289       if (*np == '$')
2290         cp = np + 1;
2291     }
2292
2293   /* Skip the flags.  */
2294   for (;;)
2295     {
2296       if (*cp == '\'' ||
2297           *cp == '-' ||
2298           *cp == '+' ||
2299           *cp == ' ' ||
2300           *cp == '#' ||
2301           *cp == '0')
2302         cp++;
2303       else
2304         break;
2305     }
2306
2307   /* Skip the field width.  */
2308   if (*cp == '*')
2309     {
2310       cp++;
2311
2312       /* Test for positional argument.  */
2313       if (*cp >= '0' && *cp <= '9')
2314         {
2315           const char *np;
2316
2317           for (np = cp; *np >= '0' && *np <= '9'; np++)
2318             ;
2319           if (*np == '$')
2320             cp = np + 1;
2321         }
2322     }
2323   else
2324     {
2325       for (; *cp >= '0' && *cp <= '9'; cp++)
2326         ;
2327     }
2328
2329   /* Skip the precision.  */
2330   if (*cp == '.')
2331     {
2332       cp++;
2333       if (*cp == '*')
2334         {
2335           /* Test for positional argument.  */
2336           if (*cp >= '0' && *cp <= '9')
2337             {
2338               const char *np;
2339
2340               for (np = cp; *np >= '0' && *np <= '9'; np++)
2341                 ;
2342               if (*np == '$')
2343                 cp = np + 1;
2344             }
2345         }
2346       else
2347         {
2348           for (; *cp >= '0' && *cp <= '9'; cp++)
2349             ;
2350         }
2351     }
2352
2353   /* Skip argument type/size specifiers.  */
2354   while (*cp == 'h' ||
2355          *cp == 'L' ||
2356          *cp == 'l' ||
2357          *cp == 'j' ||
2358          *cp == 'z' ||
2359          *cp == 'Z' ||
2360          *cp == 't')
2361     cp++;
2362
2363   /* Skip the conversion character.  */
2364   cp++;
2365
2366   *after = cp;
2367   return start;
2368 }
2369
2370 /**
2371  * g_markup_vprintf_escaped:
2372  * @format: printf() style format string
2373  * @args: variable argument list, similar to vprintf()
2374  *
2375  * Formats the data in @args according to @format, escaping
2376  * all string and character arguments in the fashion
2377  * of g_markup_escape_text(). See g_markup_printf_escaped().
2378  *
2379  * Returns: newly allocated result from formatting
2380  *  operation. Free with g_free().
2381  *
2382  * Since: 2.4
2383  */
2384 #pragma GCC diagnostic push
2385 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
2386
2387 gchar *
2388 g_markup_vprintf_escaped (const gchar *format,
2389                           va_list      args)
2390 {
2391   GString *format1;
2392   GString *format2;
2393   GString *result = NULL;
2394   gchar *output1 = NULL;
2395   gchar *output2 = NULL;
2396   const char *p, *op1, *op2;
2397   va_list args2;
2398
2399   /* The technique here, is that we make two format strings that
2400    * have the identical conversions in the identical order to the
2401    * original strings, but differ in the text in-between. We
2402    * then use the normal g_strdup_vprintf() to format the arguments
2403    * with the two new format strings. By comparing the results,
2404    * we can figure out what segments of the output come from
2405    * the original format string, and what from the arguments,
2406    * and thus know what portions of the string to escape.
2407    *
2408    * For instance, for:
2409    *
2410    *  g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2411    *
2412    * We form the two format strings "%sX%dX" and %sY%sY". The results
2413    * of formatting with those two strings are
2414    *
2415    * "%sX%dX" => "Susan & FredX5X"
2416    * "%sY%dY" => "Susan & FredY5Y"
2417    *
2418    * To find the span of the first argument, we find the first position
2419    * where the two arguments differ, which tells us that the first
2420    * argument formatted to "Susan & Fred". We then escape that
2421    * to "Susan & Fred" and join up with the intermediate portions
2422    * of the format string and the second argument to get
2423    * "Susan & Fred ate 5 apples".
2424    */
2425
2426   /* Create the two modified format strings
2427    */
2428   format1 = g_string_new (NULL);
2429   format2 = g_string_new (NULL);
2430   p = format;
2431   while (TRUE)
2432     {
2433       const char *after;
2434       const char *conv = find_conversion (p, &after);
2435       if (!conv)
2436         break;
2437
2438       g_string_append_len (format1, conv, after - conv);
2439       g_string_append_c (format1, 'X');
2440       g_string_append_len (format2, conv, after - conv);
2441       g_string_append_c (format2, 'Y');
2442
2443       p = after;
2444     }
2445
2446   /* Use them to format the arguments
2447    */
2448   G_VA_COPY (args2, args);
2449
2450   output1 = g_strdup_vprintf (format1->str, args);
2451
2452   if (!output1)
2453     {
2454       va_end (args2);
2455       goto cleanup;
2456     }
2457
2458   output2 = g_strdup_vprintf (format2->str, args2);
2459   va_end (args2);
2460   if (!output2)
2461     goto cleanup;
2462   result = g_string_new (NULL);
2463
2464   /* Iterate through the original format string again,
2465    * copying the non-conversion portions and the escaped
2466    * converted arguments to the output string.
2467    */
2468   op1 = output1;
2469   op2 = output2;
2470   p = format;
2471   while (TRUE)
2472     {
2473       const char *after;
2474       const char *output_start;
2475       const char *conv = find_conversion (p, &after);
2476       char *escaped;
2477
2478       if (!conv)        /* The end, after points to the trailing \0 */
2479         {
2480           g_string_append_len (result, p, after - p);
2481           break;
2482         }
2483
2484       g_string_append_len (result, p, conv - p);
2485       output_start = op1;
2486       while (*op1 == *op2)
2487         {
2488           op1++;
2489           op2++;
2490         }
2491
2492       escaped = g_markup_escape_text (output_start, op1 - output_start);
2493       g_string_append (result, escaped);
2494       g_free (escaped);
2495
2496       p = after;
2497       op1++;
2498       op2++;
2499     }
2500
2501  cleanup:
2502   g_string_free (format1, TRUE);
2503   g_string_free (format2, TRUE);
2504   g_free (output1);
2505   g_free (output2);
2506
2507   if (result)
2508     return g_string_free (result, FALSE);
2509   else
2510     return NULL;
2511 }
2512
2513 #pragma GCC diagnostic pop
2514
2515 /**
2516  * g_markup_printf_escaped:
2517  * @format: printf() style format string
2518  * @...: the arguments to insert in the format string
2519  *
2520  * Formats arguments according to @format, escaping
2521  * all string and character arguments in the fashion
2522  * of g_markup_escape_text(). This is useful when you
2523  * want to insert literal strings into XML-style markup
2524  * output, without having to worry that the strings
2525  * might themselves contain markup.
2526  *
2527  * |[<!-- language="C" -->
2528  * const char *store = "Fortnum & Mason";
2529  * const char *item = "Tea";
2530  * char *output;
2531  *
2532  * output = g_markup_printf_escaped ("<purchase>"
2533  *                                   "<store>%s</store>"
2534  *                                   "<item>%s</item>"
2535  *                                   "</purchase>",
2536  *                                   store, item);
2537  * ]|
2538  *
2539  * Returns: newly allocated result from formatting
2540  *    operation. Free with g_free().
2541  *
2542  * Since: 2.4
2543  */
2544 gchar *
2545 g_markup_printf_escaped (const gchar *format, ...)
2546 {
2547   char *result;
2548   va_list args;
2549
2550   va_start (args, format);
2551   result = g_markup_vprintf_escaped (format, args);
2552   va_end (args);
2553
2554   return result;
2555 }
2556
2557 static gboolean
2558 g_markup_parse_boolean (const char  *string,
2559                         gboolean    *value)
2560 {
2561   char const * const falses[] = { "false", "f", "no", "n", "0" };
2562   char const * const trues[] = { "true", "t", "yes", "y", "1" };
2563   int i;
2564
2565   for (i = 0; i < G_N_ELEMENTS (falses); i++)
2566     {
2567       if (g_ascii_strcasecmp (string, falses[i]) == 0)
2568         {
2569           if (value != NULL)
2570             *value = FALSE;
2571
2572           return TRUE;
2573         }
2574     }
2575
2576   for (i = 0; i < G_N_ELEMENTS (trues); i++)
2577     {
2578       if (g_ascii_strcasecmp (string, trues[i]) == 0)
2579         {
2580           if (value != NULL)
2581             *value = TRUE;
2582
2583           return TRUE;
2584         }
2585     }
2586
2587   return FALSE;
2588 }
2589
2590 /**
2591  * GMarkupCollectType:
2592  * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2593  *     to collect
2594  * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2595  *     the attribute_values[] array. Expects a parameter of type (const
2596  *     char **). If %G_MARKUP_COLLECT_OPTIONAL is specified and the
2597  *     attribute isn't present then the pointer will be set to %NULL
2598  * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2599  *     expects a parameter of type (char **) and g_strdup()s the
2600  *     returned pointer. The pointer must be freed with g_free()
2601  * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *)
2602  *     and parses the attribute value as a boolean. Sets %FALSE if the
2603  *     attribute isn't present. Valid boolean values consist of
2604  *     (case-insensitive) "false", "f", "no", "n", "0" and "true", "t",
2605  *     "yes", "y", "1"
2606  * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2607  *     in the case of a missing attribute a value is set that compares
2608  *     equal to neither %FALSE nor %TRUE G_MARKUP_COLLECT_OPTIONAL is
2609  *     implied
2610  * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other fields.
2611  *     If present, allows the attribute not to appear. A default value
2612  *     is set depending on what value type is used
2613  *
2614  * A mixed enumerated type and flags field. You must specify one type
2615  * (string, strdup, boolean, tristate).  Additionally, you may  optionally
2616  * bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
2617  *
2618  * It is likely that this enum will be extended in the future to
2619  * support other types.
2620  */
2621
2622 /**
2623  * g_markup_collect_attributes:
2624  * @element_name: the current tag name
2625  * @attribute_names: the attribute names
2626  * @attribute_values: the attribute values
2627  * @error: a pointer to a #GError or %NULL
2628  * @first_type: the #GMarkupCollectType of the first attribute
2629  * @first_attr: the name of the first attribute
2630  * @...: a pointer to the storage location of the first attribute
2631  *     (or %NULL), followed by more types names and pointers, ending
2632  *     with %G_MARKUP_COLLECT_INVALID
2633  *
2634  * Collects the attributes of the element from the data passed to the
2635  * #GMarkupParser start_element function, dealing with common error
2636  * conditions and supporting boolean values.
2637  *
2638  * This utility function is not required to write a parser but can save
2639  * a lot of typing.
2640  *
2641  * The @element_name, @attribute_names, @attribute_values and @error
2642  * parameters passed to the start_element callback should be passed
2643  * unmodified to this function.
2644  *
2645  * Following these arguments is a list of "supported" attributes to collect.
2646  * It is an error to specify multiple attributes with the same name. If any
2647  * attribute not in the list appears in the @attribute_names array then an
2648  * unknown attribute error will result.
2649  *
2650  * The #GMarkupCollectType field allows specifying the type of collection
2651  * to perform and if a given attribute must appear or is optional.
2652  *
2653  * The attribute name is simply the name of the attribute to collect.
2654  *
2655  * The pointer should be of the appropriate type (see the descriptions
2656  * under #GMarkupCollectType) and may be %NULL in case a particular
2657  * attribute is to be allowed but ignored.
2658  *
2659  * This function deals with issuing errors for missing attributes
2660  * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2661  * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2662  * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2663  * as parse errors for boolean-valued attributes (again of type
2664  * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2665  * will be returned and @error will be set as appropriate.
2666  *
2667  * Returns: %TRUE if successful
2668  *
2669  * Since: 2.16
2670  **/
2671 gboolean
2672 g_markup_collect_attributes (const gchar         *element_name,
2673                              const gchar        **attribute_names,
2674                              const gchar        **attribute_values,
2675                              GError             **error,
2676                              GMarkupCollectType   first_type,
2677                              const gchar         *first_attr,
2678                              ...)
2679 {
2680   GMarkupCollectType type;
2681   const gchar *attr;
2682   guint64 collected;
2683   int written;
2684   va_list ap;
2685   int i;
2686
2687   type = first_type;
2688   attr = first_attr;
2689   collected = 0;
2690   written = 0;
2691
2692   va_start (ap, first_attr);
2693   while (type != G_MARKUP_COLLECT_INVALID)
2694     {
2695       gboolean mandatory;
2696       const gchar *value;
2697
2698       mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
2699       type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
2700
2701       /* tristate records a value != TRUE and != FALSE
2702        * for the case where the attribute is missing
2703        */
2704       if (type == G_MARKUP_COLLECT_TRISTATE)
2705         mandatory = FALSE;
2706
2707       for (i = 0; attribute_names[i]; i++)
2708         if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
2709           if (!strcmp (attribute_names[i], attr))
2710             break;
2711
2712       /* ISO C99 only promises that the user can pass up to 127 arguments.
2713        * Subtracting the first 4 arguments plus the final NULL and dividing
2714        * by 3 arguments per collected attribute, we are left with a maximum
2715        * number of supported attributes of (127 - 5) / 3 = 40.
2716        *
2717        * In reality, nobody is ever going to call us with anywhere close to
2718        * 40 attributes to collect, so it is safe to assume that if i > 40
2719        * then the user has given some invalid or repeated arguments.  These
2720        * problems will be caught and reported at the end of the function.
2721        *
2722        * We know at this point that we have an error, but we don't know
2723        * what error it is, so just continue...
2724        */
2725       if (i < 40)
2726         collected |= (G_GUINT64_CONSTANT(1) << i);
2727
2728       value = attribute_values[i];
2729
2730       if (value == NULL && mandatory)
2731         {
2732           g_set_error (error, G_MARKUP_ERROR,
2733                        G_MARKUP_ERROR_MISSING_ATTRIBUTE,
2734                        "element '%s' requires attribute '%s'",
2735                        element_name, attr);
2736
2737           va_end (ap);
2738           goto failure;
2739         }
2740
2741       switch (type)
2742         {
2743         case G_MARKUP_COLLECT_STRING:
2744           {
2745             const char **str_ptr;
2746
2747             str_ptr = va_arg (ap, const char **);
2748
2749             if (str_ptr != NULL)
2750               *str_ptr = value;
2751           }
2752           break;
2753
2754         case G_MARKUP_COLLECT_STRDUP:
2755           {
2756             char **str_ptr;
2757
2758             str_ptr = va_arg (ap, char **);
2759
2760             if (str_ptr != NULL)
2761               *str_ptr = g_strdup (value);
2762           }
2763           break;
2764
2765         case G_MARKUP_COLLECT_BOOLEAN:
2766         case G_MARKUP_COLLECT_TRISTATE:
2767           if (value == NULL)
2768             {
2769               gboolean *bool_ptr;
2770
2771               bool_ptr = va_arg (ap, gboolean *);
2772
2773               if (bool_ptr != NULL)
2774                 {
2775                   if (type == G_MARKUP_COLLECT_TRISTATE)
2776                     /* constructivists rejoice!
2777                      * neither false nor true...
2778                      */
2779                     *bool_ptr = -1;
2780
2781                   else /* G_MARKUP_COLLECT_BOOLEAN */
2782                     *bool_ptr = FALSE;
2783                 }
2784             }
2785           else
2786             {
2787               if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
2788                 {
2789                   g_set_error (error, G_MARKUP_ERROR,
2790                                G_MARKUP_ERROR_INVALID_CONTENT,
2791                                "element '%s', attribute '%s', value '%s' "
2792                                "cannot be parsed as a boolean value",
2793                                element_name, attr, value);
2794
2795                   va_end (ap);
2796                   goto failure;
2797                 }
2798             }
2799
2800           break;
2801
2802         default:
2803           g_assert_not_reached ();
2804         }
2805
2806       type = va_arg (ap, GMarkupCollectType);
2807       attr = va_arg (ap, const char *);
2808       written++;
2809     }
2810   va_end (ap);
2811
2812   /* ensure we collected all the arguments */
2813   for (i = 0; attribute_names[i]; i++)
2814     if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
2815       {
2816         /* attribute not collected:  could be caused by two things.
2817          *
2818          * 1) it doesn't exist in our list of attributes
2819          * 2) it existed but was matched by a duplicate attribute earlier
2820          *
2821          * find out.
2822          */
2823         int j;
2824
2825         for (j = 0; j < i; j++)
2826           if (strcmp (attribute_names[i], attribute_names[j]) == 0)
2827             /* duplicate! */
2828             break;
2829
2830         /* j is now the first occurrence of attribute_names[i] */
2831         if (i == j)
2832           g_set_error (error, G_MARKUP_ERROR,
2833                        G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
2834                        "attribute '%s' invalid for element '%s'",
2835                        attribute_names[i], element_name);
2836         else
2837           g_set_error (error, G_MARKUP_ERROR,
2838                        G_MARKUP_ERROR_INVALID_CONTENT,
2839                        "attribute '%s' given multiple times for element '%s'",
2840                        attribute_names[i], element_name);
2841
2842         goto failure;
2843       }
2844
2845   return TRUE;
2846
2847 failure:
2848   /* replay the above to free allocations */
2849   type = first_type;
2850   attr = first_attr;
2851
2852   va_start (ap, first_attr);
2853   while (type != G_MARKUP_COLLECT_INVALID)
2854     {
2855       gpointer ptr;
2856
2857       ptr = va_arg (ap, gpointer);
2858
2859       if (ptr != NULL)
2860         {
2861           switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
2862             {
2863             case G_MARKUP_COLLECT_STRDUP:
2864               if (written)
2865                 g_free (*(char **) ptr);
2866
2867             case G_MARKUP_COLLECT_STRING:
2868               *(char **) ptr = NULL;
2869               break;
2870
2871             case G_MARKUP_COLLECT_BOOLEAN:
2872               *(gboolean *) ptr = FALSE;
2873               break;
2874
2875             case G_MARKUP_COLLECT_TRISTATE:
2876               *(gboolean *) ptr = -1;
2877               break;
2878             }
2879         }
2880
2881       type = va_arg (ap, GMarkupCollectType);
2882       attr = va_arg (ap, const char *);
2883     }
2884   va_end (ap);
2885
2886   return FALSE;
2887 }