glib/gmarkup.c

   1 /* gmarkup.c - Simple XML-like parser
   2  *
   3  *  Copyright 2000, 2003 Red Hat, Inc.
   4  *
   5  * GLib is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU Lesser General Public License as
   7  * published by the Free Software Foundation; either version 2 of the
   8  * License, or (at your option) any later version.
   9  *
  10  * GLib is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with GLib; see the file COPYING.LIB.  If not,
  17  * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18  *   Boston, MA 02111-1307, USA.
  19  */
  20
  21 #include "config.h"
  22
  23 #include <stdarg.h>
  24 #include <string.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <errno.h>
  28
  29 #include "glib.h"
  30 #include "glibintl.h"
  31 #include "galias.h"
  32
  33 GQuark
  34 g_markup_error_quark (void)
  35 {
  36   return g_quark_from_static_string ("g-markup-error-quark");
  37 }
  38
  39 typedef enum
  40 {
  41   STATE_START,
  42   STATE_AFTER_OPEN_ANGLE,
  43   STATE_AFTER_CLOSE_ANGLE,
  44   STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
  45   STATE_INSIDE_OPEN_TAG_NAME,
  46   STATE_INSIDE_ATTRIBUTE_NAME,
  47   STATE_AFTER_ATTRIBUTE_NAME,
  48   STATE_BETWEEN_ATTRIBUTES,
  49   STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
  50   STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
  51   STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
  52   STATE_INSIDE_TEXT,
  53   STATE_AFTER_CLOSE_TAG_SLASH,
  54   STATE_INSIDE_CLOSE_TAG_NAME,
  55   STATE_AFTER_CLOSE_TAG_NAME,
  56   STATE_INSIDE_PASSTHROUGH,
  57   STATE_ERROR
  58 } GMarkupParseState;
  59
  60 struct _GMarkupParseContext
  61 {
  62   const GMarkupParser *parser;
  63
  64   GMarkupParseFlags flags;
  65
  66   gint line_number;
  67   gint char_number;
  68
  69   gpointer user_data;
  70   GDestroyNotify dnotify;
  71
  72   /* A piece of character data or an element that
  73    * hasn't "ended" yet so we haven't yet called
  74    * the callback for it.
  75    */
  76   GString *partial_chunk;
  77
  78   GMarkupParseState state;
  79   GSList *tag_stack;
  80   gchar **attr_names;
  81   gchar **attr_values;
  82   gint cur_attr;
  83   gint alloc_attrs;
  84
  85   const gchar *current_text;
  86   gssize       current_text_len;
  87   const gchar *current_text_end;
  88
  89   GString *leftover_char_portion;
  90
  91   /* used to save the start of the last interesting thingy */
  92   const gchar *start;
  93
  94   const gchar *iter;
  95
  96   guint document_empty : 1;
  97   guint parsing : 1;
  98   gint balance;
  99 };
 100
 101 /**
 102  * g_markup_parse_context_new:
 103  * @parser: a #GMarkupParser
 104  * @flags: one or more #GMarkupParseFlags
 105  * @user_data: user data to pass to #GMarkupParser functions
 106  * @user_data_dnotify: user data destroy notifier called when the parse context is freed
 107  *
 108  * Creates a new parse context. A parse context is used to parse
 109  * marked-up documents. You can feed any number of documents into
 110  * a context, as long as no errors occur; once an error occurs,
 111  * the parse context can't continue to parse text (you have to free it
 112  * and create a new parse context).
 113  *
 114  * Return value: a new #GMarkupParseContext
 115  **/
 116 GMarkupParseContext *
 117 g_markup_parse_context_new (const GMarkupParser *parser,
 118                             GMarkupParseFlags    flags,
 119                             gpointer             user_data,
 120                             GDestroyNotify       user_data_dnotify)
 121 {
 122   GMarkupParseContext *context;
 123
 124   g_return_val_if_fail (parser != NULL, NULL);
 125
 126   context = g_new (GMarkupParseContext, 1);
 127
 128   context->parser = parser;
 129   context->flags = flags;
 130   context->user_data = user_data;
 131   context->dnotify = user_data_dnotify;
 132
 133   context->line_number = 1;
 134   context->char_number = 1;
 135
 136   context->partial_chunk = NULL;
 137
 138   context->state = STATE_START;
 139   context->tag_stack = NULL;
 140   context->attr_names = NULL;
 141   context->attr_values = NULL;
 142   context->cur_attr = -1;
 143   context->alloc_attrs = 0;
 144
 145   context->current_text = NULL;
 146   context->current_text_len = -1;
 147   context->current_text_end = NULL;
 148   context->leftover_char_portion = NULL;
 149
 150   context->start = NULL;
 151   context->iter = NULL;
 152
 153   context->document_empty = TRUE;
 154   context->parsing = FALSE;
 155
 156   context->balance = 0;
 157
 158   return context;
 159 }
 160
 161 /**
 162  * g_markup_parse_context_free:
 163  * @context: a #GMarkupParseContext
 164  *
 165  * Frees a #GMarkupParseContext. Can't be called from inside
 166  * one of the #GMarkupParser functions.
 167  *
 168  **/
 169 void
 170 g_markup_parse_context_free (GMarkupParseContext *context)
 171 {
 172   g_return_if_fail (context != NULL);
 173   g_return_if_fail (!context->parsing);
 174
 175   if (context->dnotify)
 176     (* context->dnotify) (context->user_data);
 177
 178   g_strfreev (context->attr_names);
 179   g_strfreev (context->attr_values);
 180
 181   g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
 182   g_slist_free (context->tag_stack);
 183
 184   if (context->partial_chunk)
 185     g_string_free (context->partial_chunk, TRUE);
 186
 187   if (context->leftover_char_portion)
 188     g_string_free (context->leftover_char_portion, TRUE);
 189
 190   g_free (context);
 191 }
 192
 193 static void
 194 mark_error (GMarkupParseContext *context,
 195             GError              *error)
 196 {
 197   context->state = STATE_ERROR;
 198
 199   if (context->parser->error)
 200     (*context->parser->error) (context, error, context->user_data);
 201 }
 202
 203 static void set_error (GMarkupParseContext *context,
 204                        GError             **error,
 205                        GMarkupError         code,
 206                        const gchar         *format,
 207                        ...) G_GNUC_PRINTF (4, 5);
 208
 209 static void
 210 set_error (GMarkupParseContext *context,
 211            GError             **error,
 212            GMarkupError         code,
 213            const gchar         *format,
 214            ...)
 215 {
 216   GError *tmp_error;
 217   gchar *s;
 218   gchar *s_valid;
 219   va_list args;
 220
 221   va_start (args, format);
 222   s = g_strdup_vprintf (format, args);
 223   va_end (args);
 224
 225   /* Make sure that the GError message is valid UTF-8 even if it is
 226    * complaining about invalid UTF-8 in the markup: */
 227   s_valid = _g_utf8_make_valid (s);
 228   tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, s_valid);
 229
 230   g_free (s);
 231   g_free (s_valid);
 232
 233   g_prefix_error (&tmp_error,
 234                   _("Error on line %d char %d: "),
 235                   context->line_number,
 236                   context->char_number);
 237
 238   mark_error (context, tmp_error);
 239
 240   g_propagate_error (error, tmp_error);
 241 }
 242
 243 static void
 244 propagate_error (GMarkupParseContext  *context,
 245                  GError              **dest,
 246                  GError               *src)
 247 {
 248   if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
 249     g_prefix_error (&src,
 250                     _("Error on line %d char %d: "),
 251                     context->line_number,
 252                     context->char_number);
 253
 254   mark_error (context, src);
 255
 256   g_propagate_error (dest, src);
 257 }
 258
 259 /* To make these faster, we first use the ascii-only tests, then check
 260  * for the usual non-alnum name-end chars, and only then call the
 261  * expensive unicode stuff. Nobody uses non-ascii in XML tag/attribute
 262  * names, so this is a reasonable hack that virtually always avoids
 263  * the guniprop call.
 264  */
 265 #define IS_COMMON_NAME_END_CHAR(c) \
 266   ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
 267
 268 static gboolean
 269 is_name_start_char (const gchar *p)
 270 {
 271   if (g_ascii_isalpha (*p) ||
 272       (!IS_COMMON_NAME_END_CHAR (*p) &&
 273        (*p == '_' ||
 274         *p == ':' ||
 275         g_unichar_isalpha (g_utf8_get_char (p)))))
 276     return TRUE;
 277   else
 278     return FALSE;
 279 }
 280
 281 static gboolean
 282 is_name_char (const gchar *p)
 283 {
 284   if (g_ascii_isalnum (*p) ||
 285       (!IS_COMMON_NAME_END_CHAR (*p) &&
 286        (*p == '.' ||
 287         *p == '-' ||
 288         *p == '_' ||
 289         *p == ':' ||
 290         g_unichar_isalpha (g_utf8_get_char (p)))))
 291     return TRUE;
 292   else
 293     return FALSE;
 294 }
 295
 296
 297 static gchar*
 298 char_str (gunichar c,
 299           gchar   *buf)
 300 {
 301   memset (buf, 0, 8);
 302   g_unichar_to_utf8 (c, buf);
 303   return buf;
 304 }
 305
 306 static gchar*
 307 utf8_str (const gchar *utf8,
 308           gchar       *buf)
 309 {
 310   char_str (g_utf8_get_char (utf8), buf);
 311   return buf;
 312 }
 313
 314 static void
 315 set_unescape_error (GMarkupParseContext *context,
 316                     GError             **error,
 317                     const gchar         *remaining_text,
 318                     const gchar         *remaining_text_end,
 319                     GMarkupError         code,
 320                     const gchar         *format,
 321                     ...)
 322 {
 323   GError *tmp_error;
 324   gchar *s;
 325   va_list args;
 326   gint remaining_newlines;
 327   const gchar *p;
 328
 329   remaining_newlines = 0;
 330   p = remaining_text;
 331   while (p != remaining_text_end)
 332     {
 333       if (*p == '\n')
 334         ++remaining_newlines;
 335       ++p;
 336     }
 337
 338   va_start (args, format);
 339   s = g_strdup_vprintf (format, args);
 340   va_end (args);
 341
 342   tmp_error = g_error_new (G_MARKUP_ERROR,
 343                            code,
 344                            _("Error on line %d: %s"),
 345                            context->line_number - remaining_newlines,
 346                            s);
 347
 348   g_free (s);
 349
 350   mark_error (context, tmp_error);
 351
 352   g_propagate_error (error, tmp_error);
 353 }
 354
 355 typedef enum
 356 {
 357   USTATE_INSIDE_TEXT,
 358   USTATE_AFTER_AMPERSAND,
 359   USTATE_INSIDE_ENTITY_NAME,
 360   USTATE_AFTER_CHARREF_HASH
 361 } UnescapeState;
 362
 363 typedef struct
 364 {
 365   GMarkupParseContext *context;
 366   GString *str;
 367   UnescapeState state;
 368   const gchar *text;
 369   const gchar *text_end;
 370   const gchar *entity_start;
 371 } UnescapeContext;
 372
 373 static const gchar*
 374 unescape_text_state_inside_text (UnescapeContext *ucontext,
 375                                  const gchar     *p,
 376                                  GError         **error)
 377 {
 378   const gchar *start;
 379   gboolean normalize_attribute;
 380
 381   if (ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ ||
 382       ucontext->context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
 383     normalize_attribute = TRUE;
 384   else
 385     normalize_attribute = FALSE;
 386
 387   start = p;
 388
 389   while (p != ucontext->text_end)
 390     {
 391       if (*p == '&')
 392         {
 393           break;
 394         }
 395       else if (normalize_attribute && (*p == '\t' || *p == '\n'))
 396         {
 397           g_string_append_len (ucontext->str, start, p - start);
 398           g_string_append_c (ucontext->str, ' ');
 399           p = g_utf8_next_char (p);
 400           start = p;
 401         }
 402       else if (*p == '\r')
 403         {
 404           g_string_append_len (ucontext->str, start, p - start);
 405           g_string_append_c (ucontext->str, normalize_attribute ? ' ' : '\n');
 406           p = g_utf8_next_char (p);
 407           if (p != ucontext->text_end && *p == '\n')
 408             p = g_utf8_next_char (p);
 409           start = p;
 410         }
 411       else
 412         p = g_utf8_next_char (p);
 413     }
 414
 415   if (p != start)
 416     g_string_append_len (ucontext->str, start, p - start);
 417
 418   if (p != ucontext->text_end && *p == '&')
 419     {
 420       p = g_utf8_next_char (p);
 421       ucontext->state = USTATE_AFTER_AMPERSAND;
 422     }
 423
 424   return p;
 425 }
 426
 427 static const gchar*
 428 unescape_text_state_after_ampersand (UnescapeContext *ucontext,
 429                                      const gchar     *p,
 430                                      GError         **error)
 431 {
 432   ucontext->entity_start = NULL;
 433
 434   if (*p == '#')
 435     {
 436       p = g_utf8_next_char (p);
 437
 438       ucontext->entity_start = p;
 439       ucontext->state = USTATE_AFTER_CHARREF_HASH;
 440     }
 441   else if (!is_name_start_char (p))
 442     {
 443       if (*p == ';')
 444         {
 445           set_unescape_error (ucontext->context, error,
 446                               p, ucontext->text_end,
 447                               G_MARKUP_ERROR_PARSE,
 448                               _("Empty entity '&;' seen; valid "
 449                                 "entities are: &amp; &quot; &lt; &gt; &apos;"));
 450         }
 451       else
 452         {
 453           gchar buf[8];
 454
 455           set_unescape_error (ucontext->context, error,
 456                               p, ucontext->text_end,
 457                               G_MARKUP_ERROR_PARSE,
 458                               _("Character '%s' is not valid at "
 459                                 "the start of an entity name; "
 460                                 "the & character begins an entity; "
 461                                 "if this ampersand isn't supposed "
 462                                 "to be an entity, escape it as "
 463                                 "&amp;"),
 464                               utf8_str (p, buf));
 465         }
 466     }
 467   else
 468     {
 469       ucontext->entity_start = p;
 470       ucontext->state = USTATE_INSIDE_ENTITY_NAME;
 471     }
 472
 473   return p;
 474 }
 475
 476 static const gchar*
 477 unescape_text_state_inside_entity_name (UnescapeContext *ucontext,
 478                                         const gchar     *p,
 479                                         GError         **error)
 480 {
 481   while (p != ucontext->text_end)
 482     {
 483       if (*p == ';')
 484         break;
 485       else if (!is_name_char (p))
 486         {
 487           gchar ubuf[8];
 488
 489           set_unescape_error (ucontext->context, error,
 490                               p, ucontext->text_end,
 491                               G_MARKUP_ERROR_PARSE,
 492                               _("Character '%s' is not valid "
 493                                 "inside an entity name"),
 494                               utf8_str (p, ubuf));
 495           break;
 496         }
 497
 498       p = g_utf8_next_char (p);
 499     }
 500
 501   if (ucontext->context->state != STATE_ERROR)
 502     {
 503       if (p != ucontext->text_end)
 504         {
 505           gint len = p - ucontext->entity_start;
 506
 507           /* move to after semicolon */
 508           p = g_utf8_next_char (p);
 509           ucontext->state = USTATE_INSIDE_TEXT;
 510
 511           if (strncmp (ucontext->entity_start, "lt", len) == 0)
 512             g_string_append_c (ucontext->str, '<');
 513           else if (strncmp (ucontext->entity_start, "gt", len) == 0)
 514             g_string_append_c (ucontext->str, '>');
 515           else if (strncmp (ucontext->entity_start, "amp", len) == 0)
 516             g_string_append_c (ucontext->str, '&');
 517           else if (strncmp (ucontext->entity_start, "quot", len) == 0)
 518             g_string_append_c (ucontext->str, '"');
 519           else if (strncmp (ucontext->entity_start, "apos", len) == 0)
 520             g_string_append_c (ucontext->str, '\'');
 521           else
 522             {
 523               gchar *name;
 524
 525               name = g_strndup (ucontext->entity_start, len);
 526               set_unescape_error (ucontext->context, error,
 527                                   p, ucontext->text_end,
 528                                   G_MARKUP_ERROR_PARSE,
 529                                   _("Entity name '%s' is not known"),
 530                                   name);
 531               g_free (name);
 532             }
 533         }
 534       else
 535         {
 536           set_unescape_error (ucontext->context, error,
 537                               /* give line number of the & */
 538                               ucontext->entity_start, ucontext->text_end,
 539                               G_MARKUP_ERROR_PARSE,
 540                               _("Entity did not end with a semicolon; "
 541                                 "most likely you used an ampersand "
 542                                 "character without intending to start "
 543                                 "an entity - escape ampersand as &amp;"));
 544         }
 545     }
 546 #undef MAX_ENT_LEN
 547
 548   return p;
 549 }
 550
 551 static const gchar*
 552 unescape_text_state_after_charref_hash (UnescapeContext *ucontext,
 553                                         const gchar     *p,
 554                                         GError         **error)
 555 {
 556   gboolean is_hex = FALSE;
 557   const char *start;
 558
 559   start = ucontext->entity_start;
 560
 561   if (*p == 'x')
 562     {
 563       is_hex = TRUE;
 564       p = g_utf8_next_char (p);
 565       start = p;
 566     }
 567
 568   while (p != ucontext->text_end && *p != ';')
 569     p = g_utf8_next_char (p);
 570
 571   if (p != ucontext->text_end)
 572     {
 573       g_assert (*p == ';');
 574
 575       /* digit is between start and p */
 576
 577       if (start != p)
 578         {
 579           gulong l;
 580           gchar *end = NULL;
 581
 582           errno = 0;
 583           if (is_hex)
 584             l = strtoul (start, &end, 16);
 585           else
 586             l = strtoul (start, &end, 10);
 587
 588           if (end != p || errno != 0)
 589             {
 590               set_unescape_error (ucontext->context, error,
 591                                   start, ucontext->text_end,
 592                                   G_MARKUP_ERROR_PARSE,
 593                                   _("Failed to parse '%-.*s', which "
 594                                     "should have been a digit "
 595                                     "inside a character reference "
 596                                     "(&#234; for example) - perhaps "
 597                                     "the digit is too large"),
 598                                   p - start, start);
 599             }
 600           else
 601             {
 602               /* characters XML permits */
 603               if (l == 0x9 ||
 604                   l == 0xA ||
 605                   l == 0xD ||
 606                   (l >= 0x20 && l <= 0xD7FF) ||
 607                   (l >= 0xE000 && l <= 0xFFFD) ||
 608                   (l >= 0x10000 && l <= 0x10FFFF))
 609                 {
 610                   gchar buf[8];
 611                   g_string_append (ucontext->str, char_str (l, buf));
 612                 }
 613               else
 614                 {
 615                   set_unescape_error (ucontext->context, error,
 616                                       start, ucontext->text_end,
 617                                       G_MARKUP_ERROR_PARSE,
 618                                       _("Character reference '%-.*s' does not "
 619                                         "encode a permitted character"),
 620                                       p - start, start);
 621                 }
 622             }
 623
 624           /* Move to next state */
 625           p = g_utf8_next_char (p); /* past semicolon */
 626           ucontext->state = USTATE_INSIDE_TEXT;
 627         }
 628       else
 629         {
 630           set_unescape_error (ucontext->context, error,
 631                               start, ucontext->text_end,
 632                               G_MARKUP_ERROR_PARSE,
 633                               _("Empty character reference; "
 634                                 "should include a digit such as "
 635                                 "&#454;"));
 636         }
 637     }
 638   else
 639     {
 640       set_unescape_error (ucontext->context, error,
 641                           start, ucontext->text_end,
 642                           G_MARKUP_ERROR_PARSE,
 643                           _("Character reference did not end with a "
 644                             "semicolon; "
 645                             "most likely you used an ampersand "
 646                             "character without intending to start "
 647                             "an entity - escape ampersand as &amp;"));
 648     }
 649
 650   return p;
 651 }
 652
 653 static gboolean
 654 unescape_text (GMarkupParseContext *context,
 655                const gchar         *text,
 656                const gchar         *text_end,
 657                GString            **unescaped,
 658                GError             **error)
 659 {
 660   UnescapeContext ucontext;
 661   const gchar *p;
 662
 663   ucontext.context = context;
 664   ucontext.text = text;
 665   ucontext.text_end = text_end;
 666   ucontext.entity_start = NULL;
 667
 668   ucontext.str = g_string_sized_new (text_end - text);
 669
 670   ucontext.state = USTATE_INSIDE_TEXT;
 671   p = text;
 672
 673   while (p != text_end && context->state != STATE_ERROR)
 674     {
 675       g_assert (p < text_end);
 676
 677       switch (ucontext.state)
 678         {
 679         case USTATE_INSIDE_TEXT:
 680           {
 681             p = unescape_text_state_inside_text (&ucontext,
 682                                                  p,
 683                                                  error);
 684           }
 685           break;
 686
 687         case USTATE_AFTER_AMPERSAND:
 688           {
 689             p = unescape_text_state_after_ampersand (&ucontext,
 690                                                      p,
 691                                                      error);
 692           }
 693           break;
 694
 695
 696         case USTATE_INSIDE_ENTITY_NAME:
 697           {
 698             p = unescape_text_state_inside_entity_name (&ucontext,
 699                                                         p,
 700                                                         error);
 701           }
 702           break;
 703
 704         case USTATE_AFTER_CHARREF_HASH:
 705           {
 706             p = unescape_text_state_after_charref_hash (&ucontext,
 707                                                         p,
 708                                                         error);
 709           }
 710           break;
 711
 712         default:
 713           g_assert_not_reached ();
 714           break;
 715         }
 716     }
 717
 718   if (context->state != STATE_ERROR)
 719     {
 720       switch (ucontext.state)
 721         {
 722         case USTATE_INSIDE_TEXT:
 723           break;
 724         case USTATE_AFTER_AMPERSAND:
 725         case USTATE_INSIDE_ENTITY_NAME:
 726           set_unescape_error (context, error,
 727                               NULL, NULL,
 728                               G_MARKUP_ERROR_PARSE,
 729                               _("Unfinished entity reference"));
 730           break;
 731         case USTATE_AFTER_CHARREF_HASH:
 732           set_unescape_error (context, error,
 733                               NULL, NULL,
 734                               G_MARKUP_ERROR_PARSE,
 735                               _("Unfinished character reference"));
 736           break;
 737         }
 738     }
 739
 740   if (context->state == STATE_ERROR)
 741     {
 742       g_string_free (ucontext.str, TRUE);
 743       *unescaped = NULL;
 744       return FALSE;
 745     }
 746   else
 747     {
 748       *unescaped = ucontext.str;
 749       return TRUE;
 750     }
 751 }
 752
 753 static inline gboolean
 754 advance_char (GMarkupParseContext *context)
 755 {
 756   context->iter = g_utf8_next_char (context->iter);
 757   context->char_number += 1;
 758
 759   if (context->iter == context->current_text_end)
 760     {
 761       return FALSE;
 762     }
 763   else if (*context->iter == '\n')
 764     {
 765       context->line_number += 1;
 766       context->char_number = 1;
 767     }
 768
 769   return TRUE;
 770 }
 771
 772 static inline gboolean
 773 xml_isspace (char c)
 774 {
 775   return c == ' ' || c == '\t' || c == '\n' || c == '\r';
 776 }
 777
 778 static void
 779 skip_spaces (GMarkupParseContext *context)
 780 {
 781   do
 782     {
 783       if (!xml_isspace (*context->iter))
 784         return;
 785     }
 786   while (advance_char (context));
 787 }
 788
 789 static void
 790 advance_to_name_end (GMarkupParseContext *context)
 791 {
 792   do
 793     {
 794       if (!is_name_char (context->iter))
 795         return;
 796     }
 797   while (advance_char (context));
 798 }
 799
 800 static void
 801 add_to_partial (GMarkupParseContext *context,
 802                 const gchar         *text_start,
 803                 const gchar         *text_end)
 804 {
 805   if (context->partial_chunk == NULL)
 806     context->partial_chunk = g_string_sized_new (text_end - text_start);
 807
 808   if (text_start != text_end)
 809     g_string_append_len (context->partial_chunk, text_start,
 810                          text_end - text_start);
 811
 812   /* Invariant here that partial_chunk exists */
 813 }
 814
 815 static void
 816 truncate_partial (GMarkupParseContext *context)
 817 {
 818   if (context->partial_chunk != NULL)
 819     {
 820       context->partial_chunk = g_string_truncate (context->partial_chunk, 0);
 821     }
 822 }
 823
 824 static const gchar*
 825 current_element (GMarkupParseContext *context)
 826 {
 827   return context->tag_stack->data;
 828 }
 829
 830 static const gchar*
 831 current_attribute (GMarkupParseContext *context)
 832 {
 833   g_assert (context->cur_attr >= 0);
 834   return context->attr_names[context->cur_attr];
 835 }
 836
 837 static void
 838 find_current_text_end (GMarkupParseContext *context)
 839 {
 840   /* This function must be safe (non-segfaulting) on invalid UTF8.
 841    * It assumes the string starts with a character start
 842    */
 843   const gchar *end = context->current_text + context->current_text_len;
 844   const gchar *p;
 845   const gchar *next;
 846
 847   g_assert (context->current_text_len > 0);
 848
 849   p = g_utf8_find_prev_char (context->current_text, end);
 850
 851   g_assert (p != NULL); /* since current_text was a char start */
 852
 853   /* p is now the start of the last character or character portion. */
 854   g_assert (p != end);
 855   next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
 856
 857   if (next == end)
 858     {
 859       /* whole character */
 860       context->current_text_end = end;
 861     }
 862   else
 863     {
 864       /* portion */
 865       context->leftover_char_portion = g_string_new_len (p, end - p);
 866       context->current_text_len -= (end - p);
 867       context->current_text_end = p;
 868     }
 869 }
 870
 871
 872 static void
 873 add_attribute (GMarkupParseContext *context, char *name)
 874 {
 875   if (context->cur_attr + 2 >= context->alloc_attrs)
 876     {
 877       context->alloc_attrs += 5; /* silly magic number */
 878       context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs);
 879       context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs);
 880     }
 881   context->cur_attr++;
 882   context->attr_names[context->cur_attr] = name;
 883   context->attr_values[context->cur_attr] = NULL;
 884   context->attr_names[context->cur_attr+1] = NULL;
 885   context->attr_values[context->cur_attr+1] = NULL;
 886 }
 887
 888 /**
 889  * g_markup_parse_context_parse:
 890  * @context: a #GMarkupParseContext
 891  * @text: chunk of text to parse
 892  * @text_len: length of @text in bytes
 893  * @error: return location for a #GError
 894  *
 895  * Feed some data to the #GMarkupParseContext. The data need not
 896  * be valid UTF-8; an error will be signaled if it's invalid.
 897  * The data need not be an entire document; you can feed a document
 898  * into the parser incrementally, via multiple calls to this function.
 899  * Typically, as you receive data from a network connection or file,
 900  * you feed each received chunk of data into this function, aborting
 901  * the process if an error occurs. Once an error is reported, no further
 902  * data may be fed to the #GMarkupParseContext; all errors are fatal.
 903  *
 904  * Return value: %FALSE if an error occurred, %TRUE on success
 905  **/
 906 gboolean
 907 g_markup_parse_context_parse (GMarkupParseContext *context,
 908                               const gchar         *text,
 909                               gssize               text_len,
 910                               GError             **error)
 911 {
 912   const gchar *first_invalid;
 913
 914   g_return_val_if_fail (context != NULL, FALSE);
 915   g_return_val_if_fail (text != NULL, FALSE);
 916   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
 917   g_return_val_if_fail (!context->parsing, FALSE);
 918
 919   if (text_len < 0)
 920     text_len = strlen (text);
 921
 922   if (text_len == 0)
 923     return TRUE;
 924
 925   context->parsing = TRUE;
 926
 927   if (context->leftover_char_portion)
 928     {
 929       const gchar *first_char;
 930
 931       if ((*text & 0xc0) != 0x80)
 932         first_char = text;
 933       else
 934         first_char = g_utf8_find_next_char (text, text + text_len);
 935
 936       if (first_char)
 937         {
 938           /* leftover_char_portion was completed. Parse it. */
 939           GString *portion = context->leftover_char_portion;
 940
 941           g_string_append_len (context->leftover_char_portion,
 942                                text, first_char - text);
 943
 944           /* hacks to allow recursion */
 945           context->parsing = FALSE;
 946           context->leftover_char_portion = NULL;
 947
 948           if (!g_markup_parse_context_parse (context,
 949                                              portion->str, portion->len,
 950                                              error))
 951             {
 952               g_assert (context->state == STATE_ERROR);
 953             }
 954
 955           g_string_free (portion, TRUE);
 956           context->parsing = TRUE;
 957
 958           /* Skip the fraction of char that was in this text */
 959           text_len -= (first_char - text);
 960           text = first_char;
 961         }
 962       else
 963         {
 964           /* another little chunk of the leftover char; geez
 965            * someone is inefficient.
 966            */
 967           g_string_append_len (context->leftover_char_portion,
 968                                text, text_len);
 969
 970           if (context->leftover_char_portion->len > 7)
 971             {
 972               /* The leftover char portion is too big to be
 973                * a UTF-8 character
 974                */
 975               set_error (context,
 976                          error,
 977                          G_MARKUP_ERROR_BAD_UTF8,
 978                          _("Invalid UTF-8 encoded text - overlong sequence"));
 979             }
 980
 981           goto finished;
 982         }
 983     }
 984
 985   context->current_text = text;
 986   context->current_text_len = text_len;
 987   context->iter = context->current_text;
 988   context->start = context->iter;
 989
 990   /* Nothing left after finishing the leftover char, or nothing
 991    * passed in to begin with.
 992    */
 993   if (context->current_text_len == 0)
 994     goto finished;
 995
 996   /* find_current_text_end () assumes the string starts at
 997    * a character start, so we need to validate at least
 998    * that much. It doesn't assume any following bytes
 999    * are valid.
1000    */
1001   if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
1002     {
1003       set_error (context,
1004                  error,
1005                  G_MARKUP_ERROR_BAD_UTF8,
1006                  _("Invalid UTF-8 encoded text - not a start char"));
1007       goto finished;
1008     }
1009
1010   /* Initialize context->current_text_end, possibly adjusting
1011    * current_text_len, and add any leftover char portion
1012    */
1013   find_current_text_end (context);
1014
1015   /* Validate UTF8 (must be done after we find the end, since
1016    * we could have a trailing incomplete char)
1017    */
1018   if (!g_utf8_validate (context->current_text,
1019                         context->current_text_len,
1020                         &first_invalid))
1021     {
1022       gint newlines = 0;
1023       const gchar *p, *q;
1024       q = p = context->current_text;
1025       while (p != first_invalid)
1026         {
1027           if (*p == '\n')
1028             {
1029               ++newlines;
1030               q = p + 1;
1031               context->char_number = 1;
1032             }
1033           ++p;
1034         }
1035
1036       context->line_number += newlines;
1037       context->char_number += g_utf8_strlen (q, first_invalid - q);
1038
1039       set_error (context,
1040                  error,
1041                  G_MARKUP_ERROR_BAD_UTF8,
1042                  _("Invalid UTF-8 encoded text - not valid '%s'"),
1043                  g_strndup (context->current_text,
1044                             context->current_text_len));
1045       goto finished;
1046     }
1047
1048   while (context->iter != context->current_text_end)
1049     {
1050       switch (context->state)
1051         {
1052         case STATE_START:
1053           /* Possible next state: AFTER_OPEN_ANGLE */
1054
1055           g_assert (context->tag_stack == NULL);
1056
1057           /* whitespace is ignored outside of any elements */
1058           skip_spaces (context);
1059
1060           if (context->iter != context->current_text_end)
1061             {
1062               if (*context->iter == '<')
1063                 {
1064                   /* Move after the open angle */
1065                   advance_char (context);
1066
1067                   context->state = STATE_AFTER_OPEN_ANGLE;
1068
1069                   /* this could start a passthrough */
1070                   context->start = context->iter;
1071
1072                   /* document is now non-empty */
1073                   context->document_empty = FALSE;
1074                 }
1075               else
1076                 {
1077                   set_error (context,
1078                              error,
1079                              G_MARKUP_ERROR_PARSE,
1080                              _("Document must begin with an element (e.g. <book>)"));
1081                 }
1082             }
1083           break;
1084
1085         case STATE_AFTER_OPEN_ANGLE:
1086           /* Possible next states: INSIDE_OPEN_TAG_NAME,
1087            *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1088            */
1089           if (*context->iter == '?' ||
1090               *context->iter == '!')
1091             {
1092               /* include < in the passthrough */
1093               const gchar *openangle = "<";
1094               add_to_partial (context, openangle, openangle + 1);
1095               context->start = context->iter;
1096               context->balance = 1;
1097               context->state = STATE_INSIDE_PASSTHROUGH;
1098             }
1099           else if (*context->iter == '/')
1100             {
1101               /* move after it */
1102               advance_char (context);
1103
1104               context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1105             }
1106           else if (is_name_start_char (context->iter))
1107             {
1108               context->state = STATE_INSIDE_OPEN_TAG_NAME;
1109
1110               /* start of tag name */
1111               context->start = context->iter;
1112             }
1113           else
1114             {
1115               gchar buf[8];
1116
1117               set_error (context,
1118                          error,
1119                          G_MARKUP_ERROR_PARSE,
1120                          _("'%s' is not a valid character following "
1121                            "a '<' character; it may not begin an "
1122                            "element name"),
1123                          utf8_str (context->iter, buf));
1124             }
1125           break;
1126
1127           /* The AFTER_CLOSE_ANGLE state is actually sort of
1128            * broken, because it doesn't correspond to a range
1129            * of characters in the input stream as the others do,
1130            * and thus makes things harder to conceptualize
1131            */
1132         case STATE_AFTER_CLOSE_ANGLE:
1133           /* Possible next states: INSIDE_TEXT, STATE_START */
1134           if (context->tag_stack == NULL)
1135             {
1136               context->start = NULL;
1137               context->state = STATE_START;
1138             }
1139           else
1140             {
1141               context->start = context->iter;
1142               context->state = STATE_INSIDE_TEXT;
1143             }
1144           break;
1145
1146         case STATE_AFTER_ELISION_SLASH:
1147           /* Possible next state: AFTER_CLOSE_ANGLE */
1148
1149           {
1150             /* We need to pop the tag stack and call the end_element
1151              * function, since this is the close tag
1152              */
1153             GError *tmp_error = NULL;
1154
1155             g_assert (context->tag_stack != NULL);
1156
1157             tmp_error = NULL;
1158             if (context->parser->end_element)
1159               (* context->parser->end_element) (context,
1160                                                 context->tag_stack->data,
1161                                                 context->user_data,
1162                                                 &tmp_error);
1163
1164             if (tmp_error)
1165               {
1166                 mark_error (context, tmp_error);
1167                 g_propagate_error (error, tmp_error);
1168               }
1169             else
1170               {
1171                 if (*context->iter == '>')
1172                   {
1173                     /* move after the close angle */
1174                     advance_char (context);
1175                     context->state = STATE_AFTER_CLOSE_ANGLE;
1176                   }
1177                 else
1178                   {
1179                     gchar buf[8];
1180
1181                     set_error (context,
1182                                error,
1183                                G_MARKUP_ERROR_PARSE,
1184                                _("Odd character '%s', expected a '>' character "
1185                                  "to end the start tag of element '%s'"),
1186                                utf8_str (context->iter, buf),
1187                                current_element (context));
1188                   }
1189               }
1190
1191             g_free (context->tag_stack->data);
1192             context->tag_stack = g_slist_delete_link (context->tag_stack,
1193                                                       context->tag_stack);
1194           }
1195           break;
1196
1197         case STATE_INSIDE_OPEN_TAG_NAME:
1198           /* Possible next states: BETWEEN_ATTRIBUTES */
1199
1200           /* if there's a partial chunk then it's the first part of the
1201            * tag name. If there's a context->start then it's the start
1202            * of the tag name in current_text, the partial chunk goes
1203            * before that start though.
1204            */
1205           advance_to_name_end (context);
1206
1207           if (context->iter == context->current_text_end)
1208             {
1209               /* The name hasn't necessarily ended. Merge with
1210                * partial chunk, leave state unchanged.
1211                */
1212               add_to_partial (context, context->start, context->iter);
1213             }
1214           else
1215             {
1216               /* The name has ended. Combine it with the partial chunk
1217                * if any; push it on the stack; enter next state.
1218                */
1219               add_to_partial (context, context->start, context->iter);
1220               context->tag_stack =
1221                 g_slist_prepend (context->tag_stack,
1222                                  g_string_free (context->partial_chunk,
1223                                                 FALSE));
1224
1225               context->partial_chunk = NULL;
1226
1227               context->state = STATE_BETWEEN_ATTRIBUTES;
1228               context->start = NULL;
1229             }
1230           break;
1231
1232         case STATE_INSIDE_ATTRIBUTE_NAME:
1233           /* Possible next states: AFTER_ATTRIBUTE_NAME */
1234
1235           advance_to_name_end (context);
1236           add_to_partial (context, context->start, context->iter);
1237
1238           /* read the full name, if we enter the equals sign state
1239            * then add the attribute to the list (without the value),
1240            * otherwise store a partial chunk to be prepended later.
1241            */
1242           if (context->iter != context->current_text_end)
1243             context->state = STATE_AFTER_ATTRIBUTE_NAME;
1244           break;
1245
1246         case STATE_AFTER_ATTRIBUTE_NAME:
1247           /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1248
1249           skip_spaces (context);
1250
1251           if (context->iter != context->current_text_end)
1252             {
1253               /* The name has ended. Combine it with the partial chunk
1254                * if any; push it on the stack; enter next state.
1255                */
1256               add_attribute (context, g_string_free (context->partial_chunk, FALSE));
1257
1258               context->partial_chunk = NULL;
1259               context->start = NULL;
1260
1261               if (*context->iter == '=')
1262                 {
1263                   advance_char (context);
1264                   context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1265                 }
1266               else
1267                 {
1268                   gchar buf[8];
1269
1270                   set_error (context,
1271                              error,
1272                              G_MARKUP_ERROR_PARSE,
1273                              _("Odd character '%s', expected a '=' after "
1274                                "attribute name '%s' of element '%s'"),
1275                              utf8_str (context->iter, buf),
1276                              current_attribute (context),
1277                              current_element (context));
1278
1279                 }
1280             }
1281           break;
1282
1283         case STATE_BETWEEN_ATTRIBUTES:
1284           /* Possible next states: AFTER_CLOSE_ANGLE,
1285            * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1286            */
1287           skip_spaces (context);
1288
1289           if (context->iter != context->current_text_end)
1290             {
1291               if (*context->iter == '/')
1292                 {
1293                   advance_char (context);
1294                   context->state = STATE_AFTER_ELISION_SLASH;
1295                 }
1296               else if (*context->iter == '>')
1297                 {
1298
1299                   advance_char (context);
1300                   context->state = STATE_AFTER_CLOSE_ANGLE;
1301                 }
1302               else if (is_name_start_char (context->iter))
1303                 {
1304                   context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1305                   /* start of attribute name */
1306                   context->start = context->iter;
1307                 }
1308               else
1309                 {
1310                   gchar buf[8];
1311
1312                   set_error (context,
1313                              error,
1314                              G_MARKUP_ERROR_PARSE,
1315                              _("Odd character '%s', expected a '>' or '/' "
1316                                "character to end the start tag of "
1317                                "element '%s', or optionally an attribute; "
1318                                "perhaps you used an invalid character in "
1319                                "an attribute name"),
1320                              utf8_str (context->iter, buf),
1321                              current_element (context));
1322                 }
1323
1324               /* If we're done with attributes, invoke
1325                * the start_element callback
1326                */
1327               if (context->state == STATE_AFTER_ELISION_SLASH ||
1328                   context->state == STATE_AFTER_CLOSE_ANGLE)
1329                 {
1330                   const gchar *start_name;
1331                   /* Ugly, but the current code expects an empty array instead of NULL */
1332                   const gchar *empty = NULL;
1333                   const gchar **attr_names =  &empty;
1334                   const gchar **attr_values = &empty;
1335                   GError *tmp_error;
1336
1337                   /* Call user callback for element start */
1338                   start_name = current_element (context);
1339
1340                   if (context->cur_attr >= 0)
1341                     {
1342                       attr_names = (const gchar**)context->attr_names;
1343                       attr_values = (const gchar**)context->attr_values;
1344                     }
1345
1346                   tmp_error = NULL;
1347                   if (context->parser->start_element)
1348                     (* context->parser->start_element) (context,
1349                                                         start_name,
1350                                                         (const gchar **)attr_names,
1351                                                         (const gchar **)attr_values,
1352                                                         context->user_data,
1353                                                         &tmp_error);
1354
1355                   /* Go ahead and free the attributes. */
1356                   for (; context->cur_attr >= 0; context->cur_attr--)
1357                     {
1358                       int pos = context->cur_attr;
1359                       g_free (context->attr_names[pos]);
1360                       g_free (context->attr_values[pos]);
1361                       context->attr_names[pos] = context->attr_values[pos] = NULL;
1362                     }
1363                   g_assert (context->cur_attr == -1);
1364                   g_assert (context->attr_names == NULL ||
1365                             context->attr_names[0] == NULL);
1366                   g_assert (context->attr_values == NULL ||
1367                             context->attr_values[0] == NULL);
1368
1369                   if (tmp_error != NULL)
1370                     propagate_error (context, error, tmp_error);
1371                 }
1372             }
1373           break;
1374
1375         case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1376           /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
1377
1378           skip_spaces (context);
1379
1380           if (context->iter != context->current_text_end)
1381             {
1382               if (*context->iter == '"')
1383                 {
1384                   advance_char (context);
1385                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1386                   context->start = context->iter;
1387                 }
1388               else if (*context->iter == '\'')
1389                 {
1390                   advance_char (context);
1391                   context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1392                   context->start = context->iter;
1393                 }
1394               else
1395                 {
1396                   gchar buf[8];
1397
1398                   set_error (context,
1399                              error,
1400                              G_MARKUP_ERROR_PARSE,
1401                              _("Odd character '%s', expected an open quote mark "
1402                                "after the equals sign when giving value for "
1403                                "attribute '%s' of element '%s'"),
1404                              utf8_str (context->iter, buf),
1405                              current_attribute (context),
1406                              current_element (context));
1407                 }
1408             }
1409           break;
1410
1411         case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1412         case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1413           /* Possible next states: BETWEEN_ATTRIBUTES */
1414           {
1415             gchar delim;
1416
1417             if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1418               {
1419                 delim = '\'';
1420               }
1421             else
1422               {
1423                 delim = '"';
1424               }
1425
1426             do
1427               {
1428                 if (*context->iter == delim)
1429                   break;
1430               }
1431             while (advance_char (context));
1432           }
1433           if (context->iter == context->current_text_end)
1434             {
1435               /* The value hasn't necessarily ended. Merge with
1436                * partial chunk, leave state unchanged.
1437                */
1438               add_to_partial (context, context->start, context->iter);
1439             }
1440           else
1441             {
1442               /* The value has ended at the quote mark. Combine it
1443                * with the partial chunk if any; set it for the current
1444                * attribute.
1445                */
1446               GString *unescaped;
1447
1448               add_to_partial (context, context->start, context->iter);
1449
1450               g_assert (context->cur_attr >= 0);
1451
1452               if (unescape_text (context,
1453                                  context->partial_chunk->str,
1454                                  context->partial_chunk->str +
1455                                  context->partial_chunk->len,
1456                                  &unescaped,
1457                                  error))
1458                 {
1459                   /* success, advance past quote and set state. */
1460                   context->attr_values[context->cur_attr] = g_string_free (unescaped, FALSE);
1461                   advance_char (context);
1462                   context->state = STATE_BETWEEN_ATTRIBUTES;
1463                   context->start = NULL;
1464                 }
1465
1466               truncate_partial (context);
1467             }
1468           break;
1469
1470         case STATE_INSIDE_TEXT:
1471           /* Possible next states: AFTER_OPEN_ANGLE */
1472           do
1473             {
1474               if (*context->iter == '<')
1475                 break;
1476             }
1477           while (advance_char (context));
1478
1479           /* The text hasn't necessarily ended. Merge with
1480            * partial chunk, leave state unchanged.
1481            */
1482
1483           add_to_partial (context, context->start, context->iter);
1484
1485           if (context->iter != context->current_text_end)
1486             {
1487               GString *unescaped = NULL;
1488
1489               /* The text has ended at the open angle. Call the text
1490                * callback.
1491                */
1492
1493               if (unescape_text (context,
1494                                  context->partial_chunk->str,
1495                                  context->partial_chunk->str +
1496                                  context->partial_chunk->len,
1497                                  &unescaped,
1498                                  error))
1499                 {
1500                   GError *tmp_error = NULL;
1501
1502                   if (context->parser->text)
1503                     (*context->parser->text) (context,
1504                                               unescaped->str,
1505                                               unescaped->len,
1506                                               context->user_data,
1507                                               &tmp_error);
1508
1509                   g_string_free (unescaped, TRUE);
1510
1511                   if (tmp_error == NULL)
1512                     {
1513                       /* advance past open angle and set state. */
1514                       advance_char (context);
1515                       context->state = STATE_AFTER_OPEN_ANGLE;
1516                       /* could begin a passthrough */
1517                       context->start = context->iter;
1518                     }
1519                   else
1520                     propagate_error (context, error, tmp_error);
1521                 }
1522
1523               truncate_partial (context);
1524             }
1525           break;
1526
1527         case STATE_AFTER_CLOSE_TAG_SLASH:
1528           /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1529           if (is_name_start_char (context->iter))
1530             {
1531               context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1532
1533               /* start of tag name */
1534               context->start = context->iter;
1535             }
1536           else
1537             {
1538               gchar buf[8];
1539
1540               set_error (context,
1541                          error,
1542                          G_MARKUP_ERROR_PARSE,
1543                          _("'%s' is not a valid character following "
1544                            "the characters '</'; '%s' may not begin an "
1545                            "element name"),
1546                          utf8_str (context->iter, buf),
1547                          utf8_str (context->iter, buf));
1548             }
1549           break;
1550
1551         case STATE_INSIDE_CLOSE_TAG_NAME:
1552           /* Possible next state: AFTER_CLOSE_TAG_NAME */
1553           advance_to_name_end (context);
1554           add_to_partial (context, context->start, context->iter);
1555
1556           if (context->iter != context->current_text_end)
1557             context->state = STATE_AFTER_CLOSE_TAG_NAME;
1558           break;
1559
1560         case STATE_AFTER_CLOSE_TAG_NAME:
1561           /* Possible next state: AFTER_CLOSE_TAG_SLASH */
1562
1563           skip_spaces (context);
1564
1565           if (context->iter != context->current_text_end)
1566             {
1567               gchar *close_name;
1568
1569               /* The name has ended. Combine it with the partial chunk
1570                * if any; check that it matches stack top and pop
1571                * stack; invoke proper callback; enter next state.
1572                */
1573               close_name = g_string_free (context->partial_chunk, FALSE);
1574               context->partial_chunk = NULL;
1575
1576               if (*context->iter != '>')
1577                 {
1578                   gchar buf[8];
1579
1580                   set_error (context,
1581                              error,
1582                              G_MARKUP_ERROR_PARSE,
1583                              _("'%s' is not a valid character following "
1584                                "the close element name '%s'; the allowed "
1585                                "character is '>'"),
1586                              utf8_str (context->iter, buf),
1587                              close_name);
1588                 }
1589               else if (context->tag_stack == NULL)
1590                 {
1591                   set_error (context,
1592                              error,
1593                              G_MARKUP_ERROR_PARSE,
1594                              _("Element '%s' was closed, no element "
1595                                "is currently open"),
1596                              close_name);
1597                 }
1598               else if (strcmp (close_name, current_element (context)) != 0)
1599                 {
1600                   set_error (context,
1601                              error,
1602                              G_MARKUP_ERROR_PARSE,
1603                              _("Element '%s' was closed, but the currently "
1604                                "open element is '%s'"),
1605                              close_name,
1606                              current_element (context));
1607                 }
1608               else
1609                 {
1610                   GError *tmp_error;
1611                   advance_char (context);
1612                   context->state = STATE_AFTER_CLOSE_ANGLE;
1613                   context->start = NULL;
1614
1615                   /* call the end_element callback */
1616                   tmp_error = NULL;
1617                   if (context->parser->end_element)
1618                     (* context->parser->end_element) (context,
1619                                                       close_name,
1620                                                       context->user_data,
1621                                                       &tmp_error);
1622
1623
1624                   /* Pop the tag stack */
1625                   g_free (context->tag_stack->data);
1626                   context->tag_stack = g_slist_delete_link (context->tag_stack,
1627                                                             context->tag_stack);
1628
1629                   if (tmp_error)
1630                     propagate_error (context, error, tmp_error);
1631                 }
1632
1633               g_free (close_name);
1634             }
1635           break;
1636
1637         case STATE_INSIDE_PASSTHROUGH:
1638           /* Possible next state: AFTER_CLOSE_ANGLE */
1639           do
1640             {
1641               if (*context->iter == '<')
1642                 context->balance++;
1643               if (*context->iter == '>')
1644                 {
1645                   gchar *str;
1646                   gsize len;
1647
1648                   context->balance--;
1649                   add_to_partial (context, context->start, context->iter);
1650                   context->start = context->iter;
1651
1652                   str = context->partial_chunk->str;
1653                   len = context->partial_chunk->len;
1654
1655                   if (str[1] == '?' && str[len - 1] == '?')
1656                     break;
1657                   if (strncmp (str, "<!--", 4) == 0 &&
1658                       strcmp (str + len - 2, "--") == 0)
1659                     break;
1660                   if (strncmp (str, "<![CDATA[", 9) == 0 &&
1661                       strcmp (str + len - 2, "]]") == 0)
1662                     break;
1663                   if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
1664                       context->balance == 0)
1665                     break;
1666                 }
1667             }
1668           while (advance_char (context));
1669
1670           if (context->iter == context->current_text_end)
1671             {
1672               /* The passthrough hasn't necessarily ended. Merge with
1673                * partial chunk, leave state unchanged.
1674                */
1675                add_to_partial (context, context->start, context->iter);
1676             }
1677           else
1678             {
1679               /* The passthrough has ended at the close angle. Combine
1680                * it with the partial chunk if any. Call the passthrough
1681                * callback. Note that the open/close angles are
1682                * included in the text of the passthrough.
1683                */
1684               GError *tmp_error = NULL;
1685
1686               advance_char (context); /* advance past close angle */
1687               add_to_partial (context, context->start, context->iter);
1688
1689               if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT &&
1690                   strncmp (context->partial_chunk->str, "<![CDATA[", 9) == 0)
1691                 {
1692                   if (context->parser->text)
1693                     (*context->parser->text) (context,
1694                                               context->partial_chunk->str + 9,
1695                                               context->partial_chunk->len - 12,
1696                                               context->user_data,
1697                                               &tmp_error);
1698                 }
1699               else if (context->parser->passthrough)
1700                 (*context->parser->passthrough) (context,
1701                                                  context->partial_chunk->str,
1702                                                  context->partial_chunk->len,
1703                                                  context->user_data,
1704                                                  &tmp_error);
1705
1706               truncate_partial (context);
1707
1708               if (tmp_error == NULL)
1709                 {
1710                   context->state = STATE_AFTER_CLOSE_ANGLE;
1711                   context->start = context->iter; /* could begin text */
1712                 }
1713               else
1714                 propagate_error (context, error, tmp_error);
1715             }
1716           break;
1717
1718         case STATE_ERROR:
1719           goto finished;
1720           break;
1721
1722         default:
1723           g_assert_not_reached ();
1724           break;
1725         }
1726     }
1727
1728  finished:
1729   context->parsing = FALSE;
1730
1731   return context->state != STATE_ERROR;
1732 }
1733
1734 /**
1735  * g_markup_parse_context_end_parse:
1736  * @context: a #GMarkupParseContext
1737  * @error: return location for a #GError
1738  *
1739  * Signals to the #GMarkupParseContext that all data has been
1740  * fed into the parse context with g_markup_parse_context_parse().
1741  * This function reports an error if the document isn't complete,
1742  * for example if elements are still open.
1743  *
1744  * Return value: %TRUE on success, %FALSE if an error was set
1745  **/
1746 gboolean
1747 g_markup_parse_context_end_parse (GMarkupParseContext *context,
1748                                   GError             **error)
1749 {
1750   g_return_val_if_fail (context != NULL, FALSE);
1751   g_return_val_if_fail (!context->parsing, FALSE);
1752   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1753
1754   if (context->partial_chunk != NULL)
1755     {
1756       g_string_free (context->partial_chunk, TRUE);
1757       context->partial_chunk = NULL;
1758     }
1759
1760   if (context->document_empty)
1761     {
1762       set_error (context, error, G_MARKUP_ERROR_EMPTY,
1763                  _("Document was empty or contained only whitespace"));
1764       return FALSE;
1765     }
1766
1767   context->parsing = TRUE;
1768
1769   switch (context->state)
1770     {
1771     case STATE_START:
1772       /* Nothing to do */
1773       break;
1774
1775     case STATE_AFTER_OPEN_ANGLE:
1776       set_error (context, error, G_MARKUP_ERROR_PARSE,
1777                  _("Document ended unexpectedly just after an open angle bracket '<'"));
1778       break;
1779
1780     case STATE_AFTER_CLOSE_ANGLE:
1781       if (context->tag_stack != NULL)
1782         {
1783           /* Error message the same as for INSIDE_TEXT */
1784           set_error (context, error, G_MARKUP_ERROR_PARSE,
1785                      _("Document ended unexpectedly with elements still open - "
1786                        "'%s' was the last element opened"),
1787                      current_element (context));
1788         }
1789       break;
1790
1791     case STATE_AFTER_ELISION_SLASH:
1792       set_error (context, error, G_MARKUP_ERROR_PARSE,
1793                  _("Document ended unexpectedly, expected to see a close angle "
1794                    "bracket ending the tag <%s/>"), current_element (context));
1795       break;
1796
1797     case STATE_INSIDE_OPEN_TAG_NAME:
1798       set_error (context, error, G_MARKUP_ERROR_PARSE,
1799                  _("Document ended unexpectedly inside an element name"));
1800       break;
1801
1802     case STATE_INSIDE_ATTRIBUTE_NAME:
1803     case STATE_AFTER_ATTRIBUTE_NAME:
1804       set_error (context, error, G_MARKUP_ERROR_PARSE,
1805                  _("Document ended unexpectedly inside an attribute name"));
1806       break;
1807
1808     case STATE_BETWEEN_ATTRIBUTES:
1809       set_error (context, error, G_MARKUP_ERROR_PARSE,
1810                  _("Document ended unexpectedly inside an element-opening "
1811                    "tag."));
1812       break;
1813
1814     case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1815       set_error (context, error, G_MARKUP_ERROR_PARSE,
1816                  _("Document ended unexpectedly after the equals sign "
1817                    "following an attribute name; no attribute value"));
1818       break;
1819
1820     case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1821     case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1822       set_error (context, error, G_MARKUP_ERROR_PARSE,
1823                  _("Document ended unexpectedly while inside an attribute "
1824                    "value"));
1825       break;
1826
1827     case STATE_INSIDE_TEXT:
1828       g_assert (context->tag_stack != NULL);
1829       set_error (context, error, G_MARKUP_ERROR_PARSE,
1830                  _("Document ended unexpectedly with elements still open - "
1831                    "'%s' was the last element opened"),
1832                  current_element (context));
1833       break;
1834
1835     case STATE_AFTER_CLOSE_TAG_SLASH:
1836     case STATE_INSIDE_CLOSE_TAG_NAME:
1837     case STATE_AFTER_CLOSE_TAG_NAME:
1838       set_error (context, error, G_MARKUP_ERROR_PARSE,
1839                  _("Document ended unexpectedly inside the close tag for "
1840                    "element '%s'"), current_element (context));
1841       break;
1842
1843     case STATE_INSIDE_PASSTHROUGH:
1844       set_error (context, error, G_MARKUP_ERROR_PARSE,
1845                  _("Document ended unexpectedly inside a comment or "
1846                    "processing instruction"));
1847       break;
1848
1849     case STATE_ERROR:
1850     default:
1851       g_assert_not_reached ();
1852       break;
1853     }
1854
1855   context->parsing = FALSE;
1856
1857   return context->state != STATE_ERROR;
1858 }
1859
1860 /**
1861  * g_markup_parse_context_get_element:
1862  * @context: a #GMarkupParseContext
1863  * @returns: the name of the currently open element, or %NULL
1864  *
1865  * Retrieves the name of the currently open element.
1866  *
1867  * If called from the start_element or end_element handlers this will
1868  * give the element_name as passed to those functions. For the parent
1869  * elements, see g_markup_parse_context_get_element_stack().
1870  *
1871  * Since: 2.2
1872  **/
1873 G_CONST_RETURN gchar *
1874 g_markup_parse_context_get_element (GMarkupParseContext *context)
1875 {
1876   g_return_val_if_fail (context != NULL, NULL);
1877
1878   if (context->tag_stack == NULL)
1879     return NULL;
1880   else
1881     return current_element (context);
1882 }
1883
1884 /**
1885  * g_markup_parse_context_get_element_stack:
1886  * @context: a #GMarkupParseContext
1887  *
1888  * Retrieves the element stack from the internal state of the parser.
1889  * The returned #GSList is a list of strings where the first item is
1890  * the currently open tag (as would be returned by
1891  * g_markup_parse_context_get_element()) and the next item is its
1892  * immediate parent.
1893  *
1894  * This function is intended to be used in the start_element and
1895  * end_element handlers where g_markup_parse_context_get_element()
1896  * would merely return the name of the element that is being
1897  * processed.
1898  *
1899  * Returns: the element stack, which must not be modified
1900  *
1901  * Since: 2.16
1902  **/
1903 G_CONST_RETURN GSList *
1904 g_markup_parse_context_get_element_stack (GMarkupParseContext *context)
1905 {
1906   g_return_val_if_fail (context != NULL, NULL);
1907
1908   return context->tag_stack;
1909 }
1910
1911 /**
1912  * g_markup_parse_context_get_position:
1913  * @context: a #GMarkupParseContext
1914  * @line_number: return location for a line number, or %NULL
1915  * @char_number: return location for a char-on-line number, or %NULL
1916  *
1917  * Retrieves the current line number and the number of the character on
1918  * that line. Intended for use in error messages; there are no strict
1919  * semantics for what constitutes the "current" line number other than
1920  * "the best number we could come up with for error messages."
1921  *
1922  **/
1923 void
1924 g_markup_parse_context_get_position (GMarkupParseContext *context,
1925                                      gint                *line_number,
1926                                      gint                *char_number)
1927 {
1928   g_return_if_fail (context != NULL);
1929
1930   if (line_number)
1931     *line_number = context->line_number;
1932
1933   if (char_number)
1934     *char_number = context->char_number;
1935 }
1936
1937 static void
1938 append_escaped_text (GString     *str,
1939                      const gchar *text,
1940                      gssize       length)
1941 {
1942   const gchar *p;
1943   const gchar *end;
1944   gunichar c;
1945
1946   p = text;
1947   end = text + length;
1948
1949   while (p != end)
1950     {
1951       const gchar *next;
1952       next = g_utf8_next_char (p);
1953
1954       switch (*p)
1955         {
1956         case '&':
1957           g_string_append (str, "&amp;");
1958           break;
1959
1960         case '<':
1961           g_string_append (str, "&lt;");
1962           break;
1963
1964         case '>':
1965           g_string_append (str, "&gt;");
1966           break;
1967
1968         case '\'':
1969           g_string_append (str, "&apos;");
1970           break;
1971
1972         case '"':
1973           g_string_append (str, "&quot;");
1974           break;
1975
1976         default:
1977           c = g_utf8_get_char (p);
1978           if ((0x1 <= c && c <= 0x8) ||
1979               (0xb <= c && c  <= 0xc) ||
1980               (0xe <= c && c <= 0x1f) ||
1981               (0x7f <= c && c <= 0x84) ||
1982               (0x86 <= c && c <= 0x9f))
1983             g_string_append_printf (str, "&#x%x;", c);
1984           else
1985             g_string_append_len (str, p, next - p);
1986           break;
1987         }
1988
1989       p = next;
1990     }
1991 }
1992
1993 /**
1994  * g_markup_escape_text:
1995  * @text: some valid UTF-8 text
1996  * @length: length of @text in bytes, or -1 if the text is nul-terminated
1997  *
1998  * Escapes text so that the markup parser will parse it verbatim.
1999  * Less than, greater than, ampersand, etc. are replaced with the
2000  * corresponding entities. This function would typically be used
2001  * when writing out a file to be parsed with the markup parser.
2002  *
2003  * Note that this function doesn't protect whitespace and line endings
2004  * from being processed according to the XML rules for normalization
2005  * of line endings and attribute values.
2006  *
2007  * Return value: a newly allocated string with the escaped text
2008  **/
2009 gchar*
2010 g_markup_escape_text (const gchar *text,
2011                       gssize       length)
2012 {
2013   GString *str;
2014
2015   g_return_val_if_fail (text != NULL, NULL);
2016
2017   if (length < 0)
2018     length = strlen (text);
2019
2020   /* prealloc at least as long as original text */
2021   str = g_string_sized_new (length);
2022   append_escaped_text (str, text, length);
2023
2024   return g_string_free (str, FALSE);
2025 }
2026
2027 /**
2028  * find_conversion:
2029  * @format: a printf-style format string
2030  * @after: location to store a pointer to the character after
2031  *   the returned conversion. On a %NULL return, returns the
2032  *   pointer to the trailing NUL in the string
2033  *
2034  * Find the next conversion in a printf-style format string.
2035  * Partially based on code from printf-parser.c,
2036  * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2037  *
2038  * Return value: pointer to the next conversion in @format,
2039  *  or %NULL, if none.
2040  **/
2041 static const char *
2042 find_conversion (const char  *format,
2043                  const char **after)
2044 {
2045   const char *start = format;
2046   const char *cp;
2047
2048   while (*start != '\0' && *start != '%')
2049     start++;
2050
2051   if (*start == '\0')
2052     {
2053       *after = start;
2054       return NULL;
2055     }
2056
2057   cp = start + 1;
2058
2059   if (*cp == '\0')
2060     {
2061       *after = cp;
2062       return NULL;
2063     }
2064
2065   /* Test for positional argument.  */
2066   if (*cp >= '0' && *cp <= '9')
2067     {
2068       const char *np;
2069
2070       for (np = cp; *np >= '0' && *np <= '9'; np++)
2071         ;
2072       if (*np == '$')
2073         cp = np + 1;
2074     }
2075
2076   /* Skip the flags.  */
2077   for (;;)
2078     {
2079       if (*cp == '\'' ||
2080           *cp == '-' ||
2081           *cp == '+' ||
2082           *cp == ' ' ||
2083           *cp == '#' ||
2084           *cp == '0')
2085         cp++;
2086       else
2087         break;
2088     }
2089
2090   /* Skip the field width.  */
2091   if (*cp == '*')
2092     {
2093       cp++;
2094
2095       /* Test for positional argument.  */
2096       if (*cp >= '0' && *cp <= '9')
2097         {
2098           const char *np;
2099
2100           for (np = cp; *np >= '0' && *np <= '9'; np++)
2101             ;
2102           if (*np == '$')
2103             cp = np + 1;
2104         }
2105     }
2106   else
2107     {
2108       for (; *cp >= '0' && *cp <= '9'; cp++)
2109         ;
2110     }
2111
2112   /* Skip the precision.  */
2113   if (*cp == '.')
2114     {
2115       cp++;
2116       if (*cp == '*')
2117         {
2118           /* Test for positional argument.  */
2119           if (*cp >= '0' && *cp <= '9')
2120             {
2121               const char *np;
2122
2123               for (np = cp; *np >= '0' && *np <= '9'; np++)
2124                 ;
2125               if (*np == '$')
2126                 cp = np + 1;
2127             }
2128         }
2129       else
2130         {
2131           for (; *cp >= '0' && *cp <= '9'; cp++)
2132             ;
2133         }
2134     }
2135
2136   /* Skip argument type/size specifiers.  */
2137   while (*cp == 'h' ||
2138          *cp == 'L' ||
2139          *cp == 'l' ||
2140          *cp == 'j' ||
2141          *cp == 'z' ||
2142          *cp == 'Z' ||
2143          *cp == 't')
2144     cp++;
2145
2146   /* Skip the conversion character.  */
2147   cp++;
2148
2149   *after = cp;
2150   return start;
2151 }
2152
2153 /**
2154  * g_markup_vprintf_escaped:
2155  * @format: printf() style format string
2156  * @args: variable argument list, similar to vprintf()
2157  *
2158  * Formats the data in @args according to @format, escaping
2159  * all string and character arguments in the fashion
2160  * of g_markup_escape_text(). See g_markup_printf_escaped().
2161  *
2162  * Return value: newly allocated result from formatting
2163  *  operation. Free with g_free().
2164  *
2165  * Since: 2.4
2166  **/
2167 char *
2168 g_markup_vprintf_escaped (const char *format,
2169                           va_list     args)
2170 {
2171   GString *format1;
2172   GString *format2;
2173   GString *result = NULL;
2174   gchar *output1 = NULL;
2175   gchar *output2 = NULL;
2176   const char *p, *op1, *op2;
2177   va_list args2;
2178
2179   /* The technique here, is that we make two format strings that
2180    * have the identical conversions in the identical order to the
2181    * original strings, but differ in the text in-between. We
2182    * then use the normal g_strdup_vprintf() to format the arguments
2183    * with the two new format strings. By comparing the results,
2184    * we can figure out what segments of the output come from
2185    * the the original format string, and what from the arguments,
2186    * and thus know what portions of the string to escape.
2187    *
2188    * For instance, for:
2189    *
2190    *  g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2191    *
2192    * We form the two format strings "%sX%dX" and %sY%sY". The results
2193    * of formatting with those two strings are
2194    *
2195    * "%sX%dX" => "Susan & FredX5X"
2196    * "%sY%dY" => "Susan & FredY5Y"
2197    *
2198    * To find the span of the first argument, we find the first position
2199    * where the two arguments differ, which tells us that the first
2200    * argument formatted to "Susan & Fred". We then escape that
2201    * to "Susan &amp; Fred" and join up with the intermediate portions
2202    * of the format string and the second argument to get
2203    * "Susan &amp; Fred ate 5 apples".
2204    */
2205
2206   /* Create the two modified format strings
2207    */
2208   format1 = g_string_new (NULL);
2209   format2 = g_string_new (NULL);
2210   p = format;
2211   while (TRUE)
2212     {
2213       const char *after;
2214       const char *conv = find_conversion (p, &after);
2215       if (!conv)
2216         break;
2217
2218       g_string_append_len (format1, conv, after - conv);
2219       g_string_append_c (format1, 'X');
2220       g_string_append_len (format2, conv, after - conv);
2221       g_string_append_c (format2, 'Y');
2222
2223       p = after;
2224     }
2225
2226   /* Use them to format the arguments
2227    */
2228   G_VA_COPY (args2, args);
2229
2230   output1 = g_strdup_vprintf (format1->str, args);
2231   if (!output1)
2232     {
2233       va_end (args2);
2234       goto cleanup;
2235     }
2236
2237   output2 = g_strdup_vprintf (format2->str, args2);
2238   va_end (args2);
2239   if (!output2)
2240     goto cleanup;
2241
2242   result = g_string_new (NULL);
2243
2244   /* Iterate through the original format string again,
2245    * copying the non-conversion portions and the escaped
2246    * converted arguments to the output string.
2247    */
2248   op1 = output1;
2249   op2 = output2;
2250   p = format;
2251   while (TRUE)
2252     {
2253       const char *after;
2254       const char *output_start;
2255       const char *conv = find_conversion (p, &after);
2256       char *escaped;
2257
2258       if (!conv)        /* The end, after points to the trailing \0 */
2259         {
2260           g_string_append_len (result, p, after - p);
2261           break;
2262         }
2263
2264       g_string_append_len (result, p, conv - p);
2265       output_start = op1;
2266       while (*op1 == *op2)
2267         {
2268           op1++;
2269           op2++;
2270         }
2271
2272       escaped = g_markup_escape_text (output_start, op1 - output_start);
2273       g_string_append (result, escaped);
2274       g_free (escaped);
2275
2276       p = after;
2277       op1++;
2278       op2++;
2279     }
2280
2281  cleanup:
2282   g_string_free (format1, TRUE);
2283   g_string_free (format2, TRUE);
2284   g_free (output1);
2285   g_free (output2);
2286
2287   if (result)
2288     return g_string_free (result, FALSE);
2289   else
2290     return NULL;
2291 }
2292
2293 /**
2294  * g_markup_printf_escaped:
2295  * @format: printf() style format string
2296  * @Varargs: the arguments to insert in the format string
2297  *
2298  * Formats arguments according to @format, escaping
2299  * all string and character arguments in the fashion
2300  * of g_markup_escape_text(). This is useful when you
2301  * want to insert literal strings into XML-style markup
2302  * output, without having to worry that the strings
2303  * might themselves contain markup.
2304  *
2305  * |[
2306  * const char *store = "Fortnum &amp; Mason";
2307  * const char *item = "Tea";
2308  * char *output;
2309  * &nbsp;
2310  * output = g_markup_printf_escaped ("&lt;purchase&gt;"
2311  *                                   "&lt;store&gt;&percnt;s&lt;/store&gt;"
2312  *                                   "&lt;item&gt;&percnt;s&lt;/item&gt;"
2313  *                                   "&lt;/purchase&gt;",
2314  *                                   store, item);
2315  * ]|
2316  *
2317  * Return value: newly allocated result from formatting
2318  *  operation. Free with g_free().
2319  *
2320  * Since: 2.4
2321  **/
2322 char *
2323 g_markup_printf_escaped (const char *format, ...)
2324 {
2325   char *result;
2326   va_list args;
2327
2328   va_start (args, format);
2329   result = g_markup_vprintf_escaped (format, args);
2330   va_end (args);
2331
2332   return result;
2333 }
2334
2335 static gboolean
2336 g_markup_parse_boolean (const char  *string,
2337                         gboolean    *value)
2338 {
2339   char const * const falses[] = { "false", "f", "no", "n", "0" };
2340   char const * const trues[] = { "true", "t", "yes", "y", "1" };
2341   int i;
2342
2343   for (i = 0; i < G_N_ELEMENTS (falses); i++)
2344     {
2345       if (g_ascii_strcasecmp (string, falses[i]) == 0)
2346         {
2347           if (value != NULL)
2348             *value = FALSE;
2349
2350           return TRUE;
2351         }
2352     }
2353
2354   for (i = 0; i < G_N_ELEMENTS (trues); i++)
2355     {
2356       if (g_ascii_strcasecmp (string, trues[i]) == 0)
2357         {
2358           if (value != NULL)
2359             *value = TRUE;
2360
2361           return TRUE;
2362         }
2363     }
2364
2365   return FALSE;
2366 }
2367
2368 /**
2369  * GMarkupCollectType:
2370  * @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2371  *                            to collect.
2372  * @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2373  *                           the attribute_values[] array.  Expects a
2374  *                           parameter of type (const char **).  If
2375  *                           %G_MARKUP_COLLECT_OPTIONAL is specified
2376  *                           and the attribute isn't present then the
2377  *                           pointer will be set to %NULL.
2378  * @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2379  *                           expects a paramter of type (char **) and
2380  *                           g_strdup()s the returned pointer.  The
2381  *                           pointer must be freed with g_free().
2382  * @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *)
2383  *                            and parses the attribute value as a
2384  *                            boolean.  Sets %FALSE if the attribute
2385  *                            isn't present.  Valid boolean values
2386  *                            consist of (case insensitive) "false",
2387  *                            "f", "no", "n", "0" and "true", "t",
2388  *                            "yes", "y", "1".
2389  * @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2390  *                             in the case of a missing attribute a
2391  *                             value is set that compares equal to
2392  *                             neither %FALSE nor %TRUE.
2393  *                             G_MARKUP_COLLECT_OPTIONAL is implied.
2394  * @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other
2395  *                             fields.  If present, allows the
2396  *                             attribute not to appear.  A default
2397  *                             value is set depending on what value
2398  *                             type is used.
2399  *
2400  * A mixed enumerated type and flags field.  You must specify one type
2401  * (string, strdup, boolean, tristate).  Additionally, you may
2402  * optionally bitwise OR the type with the flag
2403  * %G_MARKUP_COLLECT_OPTIONAL.
2404  *
2405  * It is likely that this enum will be extended in the future to
2406  * support other types.
2407  **/
2408
2409 /**
2410  * g_markup_collect_attributes:
2411  * @element_name: the current tag name
2412  * @attribute_names: the attribute names
2413  * @attribute_values: the attribute values
2414  * @error: a pointer to a #GError or %NULL
2415  * @first_type: the #GMarkupCollectType of the
2416  *              first attribute
2417  * @first_attr: the name of the first attribute
2418  * @...: a pointer to the storage location of the
2419  *       first attribute (or %NULL), followed by
2420  *       more types names and pointers, ending
2421  *       with %G_MARKUP_COLLECT_INVALID.
2422  *
2423  * Collects the attributes of the element from the
2424  * data passed to the #GMarkupParser start_element
2425  * function, dealing with common error conditions
2426  * and supporting boolean values.
2427  *
2428  * This utility function is not required to write
2429  * a parser but can save a lot of typing.
2430  *
2431  * The @element_name, @attribute_names,
2432  * @attribute_values and @error parameters passed
2433  * to the start_element callback should be passed
2434  * unmodified to this function.
2435  *
2436  * Following these arguments is a list of
2437  * "supported" attributes to collect.  It is an
2438  * error to specify multiple attributes with the
2439  * same name.  If any attribute not in the list
2440  * appears in the @attribute_names array then an
2441  * unknown attribute error will result.
2442  *
2443  * The #GMarkupCollectType field allows specifying
2444  * the type of collection to perform and if a
2445  * given attribute must appear or is optional.
2446  *
2447  * The attribute name is simply the name of the
2448  * attribute to collect.
2449  *
2450  * The pointer should be of the appropriate type
2451  * (see the descriptions under
2452  * #GMarkupCollectType) and may be %NULL in case a
2453  * particular attribute is to be allowed but
2454  * ignored.
2455  *
2456  * This function deals with issuing errors for missing attributes
2457  * (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2458  * (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2459  * attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2460  * as parse errors for boolean-valued attributes (again of type
2461  * %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2462  * will be returned and @error will be set as appropriate.
2463  *
2464  * Return value: %TRUE if successful
2465  *
2466  * Since: 2.16
2467  **/
2468 gboolean
2469 g_markup_collect_attributes (const gchar         *element_name,
2470                              const gchar        **attribute_names,
2471                              const gchar        **attribute_values,
2472                              GError             **error,
2473                              GMarkupCollectType   first_type,
2474                              const gchar         *first_attr,
2475                              ...)
2476 {
2477   GMarkupCollectType type;
2478   const gchar *attr;
2479   guint64 collected;
2480   int written;
2481   va_list ap;
2482   int i;
2483
2484   type = first_type;
2485   attr = first_attr;
2486   collected = 0;
2487   written = 0;
2488
2489   va_start (ap, first_attr);
2490   while (type != G_MARKUP_COLLECT_INVALID)
2491     {
2492       gboolean mandatory;
2493       const gchar *value;
2494
2495       mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
2496       type &= (G_MARKUP_COLLECT_OPTIONAL - 1);
2497
2498       /* tristate records a value != TRUE and != FALSE
2499        * for the case where the attribute is missing
2500        */
2501       if (type == G_MARKUP_COLLECT_TRISTATE)
2502         mandatory = FALSE;
2503
2504       for (i = 0; attribute_names[i]; i++)
2505         if (i >= 40 || !(collected & (G_GUINT64_CONSTANT(1) << i)))
2506           if (!strcmp (attribute_names[i], attr))
2507             break;
2508
2509       /* ISO C99 only promises that the user can pass up to 127 arguments.
2510        * Subtracting the first 4 arguments plus the final NULL and dividing
2511        * by 3 arguments per collected attribute, we are left with a maximum
2512        * number of supported attributes of (127 - 5) / 3 = 40.
2513        *
2514        * In reality, nobody is ever going to call us with anywhere close to
2515        * 40 attributes to collect, so it is safe to assume that if i > 40
2516        * then the user has given some invalid or repeated arguments.  These
2517        * problems will be caught and reported at the end of the function.
2518        *
2519        * We know at this point that we have an error, but we don't know
2520        * what error it is, so just continue...
2521        */
2522       if (i < 40)
2523         collected |= (G_GUINT64_CONSTANT(1) << i);
2524
2525       value = attribute_values[i];
2526
2527       if (value == NULL && mandatory)
2528         {
2529           g_set_error (error, G_MARKUP_ERROR,
2530                        G_MARKUP_ERROR_MISSING_ATTRIBUTE,
2531                        "element '%s' requires attribute '%s'",
2532                        element_name, attr);
2533
2534           va_end (ap);
2535           goto failure;
2536         }
2537
2538       switch (type)
2539         {
2540         case G_MARKUP_COLLECT_STRING:
2541           {
2542             const char **str_ptr;
2543
2544             str_ptr = va_arg (ap, const char **);
2545
2546             if (str_ptr != NULL)
2547               *str_ptr = value;
2548           }
2549           break;
2550
2551         case G_MARKUP_COLLECT_STRDUP:
2552           {
2553             char **str_ptr;
2554
2555             str_ptr = va_arg (ap, char **);
2556
2557             if (str_ptr != NULL)
2558               *str_ptr = g_strdup (value);
2559           }
2560           break;
2561
2562         case G_MARKUP_COLLECT_BOOLEAN:
2563         case G_MARKUP_COLLECT_TRISTATE:
2564           if (value == NULL)
2565             {
2566               gboolean *bool_ptr;
2567
2568               bool_ptr = va_arg (ap, gboolean *);
2569
2570               if (bool_ptr != NULL)
2571                 {
2572                   if (type == G_MARKUP_COLLECT_TRISTATE)
2573                     /* constructivists rejoice!
2574                      * neither false nor true...
2575                      */
2576                     *bool_ptr = -1;
2577
2578                   else /* G_MARKUP_COLLECT_BOOLEAN */
2579                     *bool_ptr = FALSE;
2580                 }
2581             }
2582           else
2583             {
2584               if (!g_markup_parse_boolean (value, va_arg (ap, gboolean *)))
2585                 {
2586                   g_set_error (error, G_MARKUP_ERROR,
2587                                G_MARKUP_ERROR_INVALID_CONTENT,
2588                                "element '%s', attribute '%s', value '%s' "
2589                                "cannot be parsed as a boolean value",
2590                                element_name, attr, value);
2591
2592                   va_end (ap);
2593                   goto failure;
2594                 }
2595             }
2596
2597           break;
2598
2599         default:
2600           g_assert_not_reached ();
2601         }
2602
2603       type = va_arg (ap, GMarkupCollectType);
2604       attr = va_arg (ap, const char *);
2605       written++;
2606     }
2607   va_end (ap);
2608
2609   /* ensure we collected all the arguments */
2610   for (i = 0; attribute_names[i]; i++)
2611     if ((collected & (G_GUINT64_CONSTANT(1) << i)) == 0)
2612       {
2613         /* attribute not collected:  could be caused by two things.
2614          *
2615          * 1) it doesn't exist in our list of attributes
2616          * 2) it existed but was matched by a duplicate attribute earlier
2617          *
2618          * find out.
2619          */
2620         int j;
2621
2622         for (j = 0; j < i; j++)
2623           if (strcmp (attribute_names[i], attribute_names[j]) == 0)
2624             /* duplicate! */
2625             break;
2626
2627         /* j is now the first occurance of attribute_names[i] */
2628         if (i == j)
2629           g_set_error (error, G_MARKUP_ERROR,
2630                        G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
2631                        "attribute '%s' invalid for element '%s'",
2632                        attribute_names[i], element_name);
2633         else
2634           g_set_error (error, G_MARKUP_ERROR,
2635                        G_MARKUP_ERROR_INVALID_CONTENT,
2636                        "attribute '%s' given multiple times for element '%s'",
2637                        attribute_names[i], element_name);
2638
2639         goto failure;
2640       }
2641
2642   return TRUE;
2643
2644 failure:
2645   /* replay the above to free allocations */
2646   type = first_type;
2647   attr = first_attr;
2648
2649   va_start (ap, first_attr);
2650   while (type != G_MARKUP_COLLECT_INVALID)
2651     {
2652       gpointer ptr;
2653
2654       ptr = va_arg (ap, gpointer);
2655
2656       if (ptr == NULL)
2657         continue;
2658
2659       switch (type & (G_MARKUP_COLLECT_OPTIONAL - 1))
2660         {
2661         case G_MARKUP_COLLECT_STRDUP:
2662           if (written)
2663             g_free (*(char **) ptr);
2664
2665         case G_MARKUP_COLLECT_STRING:
2666           *(char **) ptr = NULL;
2667           break;
2668
2669         case G_MARKUP_COLLECT_BOOLEAN:
2670           *(gboolean *) ptr = FALSE;
2671           break;
2672
2673         case G_MARKUP_COLLECT_TRISTATE:
2674           *(gboolean *) ptr = -1;
2675           break;
2676         }
2677
2678       type = va_arg (ap, GMarkupCollectType);
2679       attr = va_arg (ap, const char *);
2680
2681       if (written)
2682         written--;
2683     }
2684   va_end (ap);
2685
2686   return FALSE;
2687 }
2688
2689 #define __G_MARKUP_C__
2690 #include "galiasdef.c"