gmarkup.c

   1 /* gmarkup.c - Simple XML-like parser
   2  *
   3  *  Copyright 2000 Red Hat, Inc.
   4  *
   5  * GLib is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU Lesser General Public License as
   7  * published by the Free Software Foundation; either version 2 of the
   8  * License, or (at your option) any later version.
   9  *
  10  * GLib is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with GLib; see the file COPYING.LIB.  If not,
  17  * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18  *   Boston, MA 02111-1307, USA.
  19  */
  20
  21 #include "glib.h"
  22
  23 #include <string.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <errno.h>
  27
  28 #include "glibintl.h"
  29
  30 GQuark
  31 g_markup_error_quark ()
  32 {
  33   static GQuark error_quark = 0;
  34
  35   if (error_quark == 0)
  36     error_quark = g_quark_from_static_string ("g-markup-error-quark");
  37
  38   return error_quark;
  39 }
  40
  41 typedef struct _GMarkupAttribute GMarkupAttribute;
  42
  43 struct _GMarkupAttribute
  44 {
  45   gchar *name;
  46   gchar *value;
  47 };
  48
  49 static GMarkupAttribute*
  50 attribute_new (const gchar *name, const gchar *value)
  51 {
  52   GMarkupAttribute *attr;
  53
  54   attr = g_new (GMarkupAttribute, 1);
  55
  56   /* name/value are allowed to be NULL */
  57   attr->name = g_strdup (name);
  58   attr->value = g_strdup (value);
  59
  60   return attr;
  61 }
  62
  63 static void
  64 attribute_free (GMarkupAttribute *attr)
  65 {
  66   g_free (attr->name);
  67   g_free (attr->value);
  68   g_free (attr);
  69 }
  70
  71 typedef enum
  72 {
  73   STATE_START,
  74   STATE_AFTER_OPEN_ANGLE,
  75   STATE_AFTER_CLOSE_ANGLE,
  76   STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
  77   STATE_INSIDE_OPEN_TAG_NAME,
  78   STATE_INSIDE_ATTRIBUTE_NAME,
  79   STATE_BETWEEN_ATTRIBUTES,
  80   STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
  81   STATE_INSIDE_ATTRIBUTE_VALUE,
  82   STATE_INSIDE_TEXT,
  83   STATE_AFTER_CLOSE_TAG_SLASH,
  84   STATE_INSIDE_CLOSE_TAG_NAME,
  85   STATE_INSIDE_PASSTHROUGH,
  86   STATE_ERROR
  87 } GMarkupParseState;
  88
  89 struct _GMarkupParseContext
  90 {
  91   const GMarkupParser *parser;
  92
  93   GMarkupParseFlags flags;
  94
  95   gint line_number;
  96   gint char_number;
  97
  98   gpointer user_data;
  99   GDestroyNotify dnotify;
 100
 101   /* A piece of character data or an element that
 102    * hasn't "ended" yet so we haven't yet called
 103    * the callback for it.
 104    */
 105   GString *partial_chunk;
 106
 107   GMarkupParseState state;
 108   GSList *tag_stack;
 109   GSList *attributes;
 110
 111   const gchar *current_text;
 112   gint         current_text_len;
 113   const gchar *current_text_end;
 114
 115   GString *leftover_char_portion;
 116
 117   /* used to save the start of the last interesting thingy */
 118   const gchar *start;
 119
 120   const gchar *iter;
 121
 122   guint document_empty : 1;
 123   guint parsing : 1;
 124 };
 125
 126 /**
 127  * g_markup_parse_context_new:
 128  * @parser: a #GMarkupParser
 129  * @flags: one or more #GMarkupParseFlags
 130  * @user_data: user data to pass to #GMarkupParser functions
 131  * @user_data_dnotify: user data destroy notifier called when the parse context is freed
 132  *
 133  * Creates a new parse context. A parse context is used to parse
 134  * marked-up documents. You can feed any number of documents into
 135  * a context, as long as no errors occur; once an error occurs,
 136  * the parse context can't continue to parse text (you have to free it
 137  * and create a new parse context).
 138  *
 139  * Return value: a new #GMarkupParseContext
 140  **/
 141 GMarkupParseContext *
 142 g_markup_parse_context_new (const GMarkupParser *parser,
 143                             GMarkupParseFlags    flags,
 144                             gpointer             user_data,
 145                             GDestroyNotify       user_data_dnotify)
 146 {
 147   GMarkupParseContext *context;
 148
 149   g_return_val_if_fail (parser != NULL, NULL);
 150
 151   context = g_new (GMarkupParseContext, 1);
 152
 153   context->parser = parser;
 154   context->flags = flags;
 155   context->user_data = user_data;
 156   context->dnotify = user_data_dnotify;
 157
 158   context->line_number = 1;
 159   context->char_number = 1;
 160
 161   context->partial_chunk = NULL;
 162
 163   context->state = STATE_START;
 164   context->tag_stack = NULL;
 165   context->attributes = NULL;
 166
 167   context->current_text = NULL;
 168   context->current_text_len = -1;
 169   context->current_text_end = NULL;
 170   context->leftover_char_portion = NULL;
 171
 172   context->start = NULL;
 173   context->iter = NULL;
 174
 175   context->document_empty = TRUE;
 176   context->parsing = FALSE;
 177
 178   return context;
 179 }
 180
 181 /**
 182  * g_markup_parse_context_free:
 183  * @context: a #GMarkupParseContext
 184  *
 185  * Frees a #GMarkupParseContext. Can't be called from inside
 186  * one of the #GMarkupParser functions.
 187  *
 188  **/
 189 void
 190 g_markup_parse_context_free (GMarkupParseContext *context)
 191 {
 192   g_return_if_fail (context != NULL);
 193   g_return_if_fail (!context->parsing);
 194
 195   if (context->dnotify)
 196     (* context->dnotify) (context->user_data);
 197
 198   g_slist_foreach (context->attributes, (GFunc)attribute_free, NULL);
 199   g_slist_free (context->attributes);
 200
 201   g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL);
 202   g_slist_free (context->tag_stack);
 203
 204   if (context->partial_chunk)
 205     g_string_free (context->partial_chunk, TRUE);
 206
 207   if (context->leftover_char_portion)
 208     g_string_free (context->leftover_char_portion, TRUE);
 209
 210   g_free (context);
 211 }
 212
 213 static void
 214 attribute_list_to_arrays (GSList  *attributes,
 215                           gchar ***namesp,
 216                           gchar ***valuesp,
 217                           gint    *n_attributes)
 218 {
 219   GSList *tmp_list;
 220   gint len;
 221   gchar **names;
 222   gchar **values;
 223   gint i;
 224
 225   len = g_slist_length (attributes);
 226
 227   if (namesp)
 228     {
 229       names = g_new (gchar*, len + 1);
 230       names[len] = NULL;
 231     }
 232   else
 233     names = NULL;
 234
 235   if (valuesp)
 236     {
 237       values = g_new (gchar*, len + 1);
 238       values[len] = NULL;
 239     }
 240   else
 241     values = NULL;
 242
 243   /* We want to reverse the list, since it's
 244    * backward from the order the attributes appeared
 245    * in the file.
 246    */
 247   i = len - 1;
 248   tmp_list = attributes;
 249   while (tmp_list)
 250     {
 251       GMarkupAttribute *attr = tmp_list->data;
 252
 253       g_assert (i >= 0);
 254
 255       if (namesp)
 256         names[i] = g_strdup (attr->name);
 257
 258       if (valuesp)
 259         values[i] = g_strdup (attr->value);
 260
 261       tmp_list = g_slist_next (tmp_list);
 262       --i;
 263     }
 264
 265   if (n_attributes)
 266     *n_attributes = len;
 267
 268   if (namesp)
 269     *namesp = names;
 270
 271   if (valuesp)
 272     *valuesp = values;
 273 }
 274
 275 static void
 276 mark_error (GMarkupParseContext *context,
 277             GError              *error)
 278 {
 279   context->state = STATE_ERROR;
 280
 281   if (context->parser->error)
 282     (*context->parser->error) (context, error, context->user_data);
 283 }
 284
 285 static void
 286 set_error (GMarkupParseContext *context,
 287            GError             **error,
 288            GMarkupError         code,
 289            const gchar         *format,
 290            ...)
 291 {
 292   GError *tmp_error;
 293   gchar *s;
 294   va_list args;
 295
 296   va_start (args, format);
 297   s = g_strdup_vprintf (format, args);
 298   va_end (args);
 299
 300   tmp_error = g_error_new (G_MARKUP_ERROR,
 301                            code,
 302                            _("Error on line %d char %d: %s"),
 303                            context->line_number,
 304                            context->char_number,
 305                            s);
 306
 307   g_free (s);
 308
 309   mark_error (context, tmp_error);
 310
 311   g_propagate_error (error, tmp_error);
 312 }
 313
 314 static gboolean
 315 is_name_start_char (gunichar c)
 316 {
 317   if (g_unichar_isalpha (c) ||
 318       c == '_' ||
 319       c == ':')
 320     return TRUE;
 321   else
 322     return FALSE;
 323 }
 324
 325 static gboolean
 326 is_name_char (gunichar c)
 327 {
 328   if (g_unichar_isalnum (c) ||
 329       c == '.' ||
 330       c == '-' ||
 331       c == '_' ||
 332       c == ':')
 333     return TRUE;
 334   else
 335     return FALSE;
 336 }
 337
 338
 339 static gchar*
 340 char_str (gunichar c,
 341           gchar   *buf)
 342 {
 343   memset (buf, 0, 7);
 344   g_unichar_to_utf8 (c, buf);
 345   return buf;
 346 }
 347
 348 static gchar*
 349 utf8_str (const gchar *utf8,
 350           gchar       *buf)
 351 {
 352   char_str (g_utf8_get_char (utf8), buf);
 353   return buf;
 354 }
 355
 356 static void
 357 set_unescape_error (GMarkupParseContext *context,
 358                     GError             **error,
 359                     const gchar         *remaining_text,
 360                     const gchar         *remaining_text_end,
 361                     GMarkupError         code,
 362                     const gchar         *format,
 363                     ...)
 364 {
 365   GError *tmp_error;
 366   gchar *s;
 367   va_list args;
 368   gint remaining_newlines;
 369   const gchar *p;
 370
 371   remaining_newlines = 0;
 372   p = remaining_text;
 373   while (p != remaining_text_end)
 374     {
 375       if (*p == '\n')
 376         ++remaining_newlines;
 377       ++p;
 378     }
 379
 380   va_start (args, format);
 381   s = g_strdup_vprintf (format, args);
 382   va_end (args);
 383
 384   tmp_error = g_error_new (G_MARKUP_ERROR,
 385                            code,
 386                            _("Error on line %d: %s"),
 387                            context->line_number - remaining_newlines,
 388                            s);
 389
 390   g_free (s);
 391
 392   mark_error (context, tmp_error);
 393
 394   g_propagate_error (error, tmp_error);
 395 }
 396
 397 typedef enum
 398 {
 399   USTATE_INSIDE_TEXT,
 400   USTATE_AFTER_AMPERSAND,
 401   USTATE_INSIDE_ENTITY_NAME,
 402   USTATE_AFTER_CHARREF_HASH
 403 } UnescapeState;
 404
 405 static gboolean
 406 unescape_text (GMarkupParseContext *context,
 407                const gchar         *text,
 408                const gchar         *text_end,
 409                gchar              **unescaped,
 410                GError             **error)
 411 {
 412 #define MAX_ENT_LEN 5
 413   GString *str;
 414   const gchar *p;
 415   UnescapeState state;
 416   const gchar *start;
 417
 418   str = g_string_new ("");
 419
 420   state = USTATE_INSIDE_TEXT;
 421   p = text;
 422   start = p;
 423   while (p != text_end && context->state != STATE_ERROR)
 424     {
 425       g_assert (p < text_end);
 426
 427       switch (state)
 428         {
 429         case USTATE_INSIDE_TEXT:
 430           {
 431             while (p != text_end && *p != '&')
 432               p = g_utf8_next_char (p);
 433
 434             if (p != start)
 435               {
 436                 g_string_append_len (str, start, p - start);
 437
 438                 start = NULL;
 439               }
 440
 441             if (p != text_end && *p == '&')
 442               {
 443                 p = g_utf8_next_char (p);
 444                 state = USTATE_AFTER_AMPERSAND;
 445               }
 446           }
 447           break;
 448
 449         case USTATE_AFTER_AMPERSAND:
 450           {
 451             if (*p == '#')
 452               {
 453                 p = g_utf8_next_char (p);
 454
 455                 start = p;
 456                 state = USTATE_AFTER_CHARREF_HASH;
 457               }
 458             else if (!is_name_start_char (g_utf8_get_char (p)))
 459               {
 460                 if (*p == ';')
 461                   {
 462                     set_unescape_error (context, error,
 463                                         p, text_end,
 464                                         G_MARKUP_ERROR_PARSE,
 465                                         _("Empty entity '&;' seen; valid "
 466                                           "entities are: &amp; &quot; &lt; &gt; &apos;"));
 467                   }
 468                 else
 469                   {
 470                     gchar buf[7];
 471
 472                     set_unescape_error (context, error,
 473                                         p, text_end,
 474                                         G_MARKUP_ERROR_PARSE,
 475                                         _("Character '%s' is not valid at "
 476                                           "the start of an entity name; "
 477                                           "the & character begins an entity; "
 478                                           "if this ampersand isn't supposed "
 479                                           "to be an entity, escape it as "
 480                                           "&amp;"),
 481                                         utf8_str (p, buf));
 482                   }
 483               }
 484             else
 485               {
 486                 start = p;
 487                 state = USTATE_INSIDE_ENTITY_NAME;
 488               }
 489           }
 490           break;
 491
 492
 493         case USTATE_INSIDE_ENTITY_NAME:
 494           {
 495             gchar buf[MAX_ENT_LEN+1] = {
 496               '\0', '\0', '\0', '\0', '\0', '\0'
 497             };
 498             gchar *dest;
 499
 500             while (p != text_end)
 501               {
 502                 if (*p == ';')
 503                   break;
 504                 else if (!is_name_char (*p))
 505                   {
 506                     gchar ubuf[7];
 507
 508                     set_unescape_error (context, error,
 509                                         p, text_end,
 510                                         G_MARKUP_ERROR_PARSE,
 511                                         _("Character '%s' is not valid "
 512                                           "inside an entity name"),
 513                                         utf8_str (p, ubuf));
 514                     break;
 515                   }
 516
 517                 p = g_utf8_next_char (p);
 518               }
 519
 520             if (context->state != STATE_ERROR)
 521               {
 522                 if (p != text_end)
 523                   {
 524                     const gchar *src;
 525
 526                     src = start;
 527                     dest = buf;
 528                     while (src != p)
 529                       {
 530                         *dest = *src;
 531                         ++dest;
 532                         ++src;
 533                       }
 534
 535                     /* move to after semicolon */
 536                     p = g_utf8_next_char (p);
 537                     start = p;
 538                     state = USTATE_INSIDE_TEXT;
 539
 540                     if (strcmp (buf, "lt") == 0)
 541                       g_string_append_c (str, '<');
 542                     else if (strcmp (buf, "gt") == 0)
 543                       g_string_append_c (str, '>');
 544                     else if (strcmp (buf, "amp") == 0)
 545                       g_string_append_c (str, '&');
 546                     else if (strcmp (buf, "quot") == 0)
 547                       g_string_append_c (str, '"');
 548                     else if (strcmp (buf, "apos") == 0)
 549                       g_string_append_c (str, '\'');
 550                     else
 551                       {
 552                         set_unescape_error (context, error,
 553                                             p, text_end,
 554                                             G_MARKUP_ERROR_PARSE,
 555                                             _("Entity name '%s' is not known"),
 556                                             buf);
 557                       }
 558                   }
 559                 else
 560                   {
 561                     set_unescape_error (context, error,
 562                                         /* give line number of the & */
 563                                         start, text_end,
 564                                         G_MARKUP_ERROR_PARSE,
 565                                         _("Entity did not end with a semicolon; "
 566                                           "most likely you used an ampersand "
 567                                           "character without intending to start "
 568                                           "an entity - escape ampersand as &amp;"));
 569                   }
 570               }
 571           }
 572           break;
 573
 574         case USTATE_AFTER_CHARREF_HASH:
 575           {
 576             gboolean is_hex = FALSE;
 577             if (*p == 'x')
 578               {
 579                 is_hex = TRUE;
 580                 p = g_utf8_next_char (p);
 581                 start = p;
 582               }
 583
 584             while (p != text_end && *p != ';')
 585               p = g_utf8_next_char (p);
 586
 587             if (p != text_end)
 588               {
 589                 g_assert (*p == ';');
 590
 591                 /* digit is between start and p */
 592
 593                 if (start != p)
 594                   {
 595                     gchar *digit = g_strndup (start, p - start);
 596                     gulong l;
 597                     gchar *end = NULL;
 598                     gchar *digit_end = digit + (p - start);
 599
 600                     errno = 0;
 601                     if (is_hex)
 602                       l = strtoul (digit, &end, 16);
 603                     else
 604                       l = strtoul (digit, &end, 10);
 605
 606                     if (end != digit_end || errno != 0)
 607                       {
 608                         set_unescape_error (context, error,
 609                                             start, text_end,
 610                                             G_MARKUP_ERROR_PARSE,
 611                                             _("Failed to parse '%s', which "
 612                                               "should have been a digit "
 613                                               "inside a character reference "
 614                                               "(&#234; for example) - perhaps "
 615                                               "the digit is too large"),
 616                                             digit);
 617                       }
 618                     else
 619                       {
 620                         /* characters XML permits */
 621                         if (l == 0x9 ||
 622                             l == 0xA ||
 623                             l == 0xD ||
 624                             (l >= 0x20 && l <= 0xD7FF) ||
 625                             (l >= 0xE000 && l <= 0xFFFD) ||
 626                             (l >= 0x10000 && l <= 0x10FFFF))
 627                           {
 628                             gchar buf[7];
 629                             g_string_append (str,
 630                                              char_str (l, buf));
 631                           }
 632                         else
 633                           {
 634                             set_unescape_error (context, error,
 635                                                 start, text_end,
 636                                                 G_MARKUP_ERROR_PARSE,
 637                                                 _("Character reference '%s' does not encode a permitted character"),
 638                                                 digit);
 639                           }
 640                       }
 641
 642                     g_free (digit);
 643
 644                     /* Move to next state */
 645                     p = g_utf8_next_char (p); /* past semicolon */
 646                     start = p;
 647                     state = USTATE_INSIDE_TEXT;
 648                   }
 649                 else
 650                   {
 651                     set_unescape_error (context, error,
 652                                         start, text_end,
 653                                         G_MARKUP_ERROR_PARSE,
 654                                         _("Empty character reference; "
 655                                           "should include a digit such as "
 656                                           "&#454;"));
 657                   }
 658               }
 659             else
 660               {
 661                 set_unescape_error (context, error,
 662                                     start, text_end,
 663                                     G_MARKUP_ERROR_PARSE,
 664                                     _("Character reference did not end with a "
 665                                       "semicolon; "
 666                                       "most likely you used an ampersand "
 667                                       "character without intending to start "
 668                                       "an entity - escape ampersand as &amp;"));
 669               }
 670           }
 671           break;
 672
 673         default:
 674           g_assert_not_reached ();
 675           break;
 676         }
 677     }
 678
 679   /* If no errors, we should have returned to USTATE_INSIDE_TEXT */
 680   g_assert (context->state == STATE_ERROR ||
 681             state == USTATE_INSIDE_TEXT);
 682
 683   if (context->state == STATE_ERROR)
 684     {
 685       g_string_free (str, TRUE);
 686       *unescaped = NULL;
 687       return FALSE;
 688     }
 689   else
 690     {
 691       *unescaped = g_string_free (str, FALSE);
 692       return TRUE;
 693     }
 694
 695 #undef MAX_ENT_LEN
 696 }
 697
 698 static gboolean
 699 advance_char (GMarkupParseContext *context)
 700 {
 701
 702   context->iter = g_utf8_next_char (context->iter);
 703   context->char_number += 1;
 704   if (*context->iter == '\n')
 705     {
 706       context->line_number += 1;
 707       context->char_number = 1;
 708     }
 709
 710   return context->iter != context->current_text_end;
 711 }
 712
 713 static void
 714 skip_spaces (GMarkupParseContext *context)
 715 {
 716   do
 717     {
 718       if (!g_unichar_isspace (g_utf8_get_char (context->iter)))
 719         return;
 720     }
 721   while (advance_char (context));
 722 }
 723
 724 static void
 725 advance_to_name_end (GMarkupParseContext *context)
 726 {
 727   do
 728     {
 729       if (!is_name_char (g_utf8_get_char (context->iter)))
 730         return;
 731     }
 732   while (advance_char (context));
 733 }
 734
 735 static void
 736 add_to_partial (GMarkupParseContext *context,
 737                 const gchar         *text_start,
 738                 const gchar         *text_end)
 739 {
 740   if (context->partial_chunk == NULL)
 741     context->partial_chunk = g_string_new ("");
 742
 743   if (text_start != text_end)
 744     g_string_append_len (context->partial_chunk, text_start,
 745                          text_end - text_start);
 746
 747   /* Invariant here that partial_chunk exists */
 748 }
 749
 750 static void
 751 free_partial (GMarkupParseContext *context)
 752 {
 753   if (context->partial_chunk != NULL)
 754     {
 755       g_string_free (context->partial_chunk, TRUE);
 756       context->partial_chunk = NULL;
 757     }
 758 }
 759
 760 static const gchar*
 761 current_element (GMarkupParseContext *context)
 762 {
 763   return context->tag_stack->data;
 764 }
 765
 766 static const gchar*
 767 current_attribute (GMarkupParseContext *context)
 768 {
 769   return ((GMarkupAttribute*)context->attributes->data)->name;
 770 }
 771
 772 static void
 773 find_current_text_end (GMarkupParseContext *context)
 774 {
 775   /* This function must be safe (non-segfaulting) on invalid UTF8 */
 776   const gchar *end = context->current_text + context->current_text_len;
 777   const gchar *p;
 778   const gchar *next;
 779
 780   g_assert (context->current_text_len > 0);
 781
 782   p = context->current_text;
 783   next = g_utf8_find_next_char (p, end);
 784
 785   while (next)
 786     {
 787       p = next;
 788       next = g_utf8_find_next_char (p, end);
 789     }
 790
 791   /* p is now the start of the last character or character portion. */
 792   g_assert (p != end);
 793   next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */
 794
 795   if (next == end)
 796     {
 797       /* whole character */
 798       context->current_text_end = end;
 799     }
 800   else
 801     {
 802       /* portion */
 803       context->leftover_char_portion = g_string_new_len (p, end - p);
 804       context->current_text_len -= (end - p);
 805       context->current_text_end = p;
 806     }
 807 }
 808
 809 /**
 810  * g_markup_parse_context_parse:
 811  * @context: a #GMarkupParseContext
 812  * @text: chunk of text to parse
 813  * @text_len: length of @text in bytes
 814  * @error: return location for a #GError
 815  *
 816  * Feed some data to the #GMarkupParseContext. The data need not
 817  * be valid UTF-8; an error will be signaled if it's invalid.
 818  * The data need not be an entire document; you can feed a document
 819  * into the parser incrementally, via multiple calls to this function.
 820  * Typically, as you receive data from a network connection or file,
 821  * you feed each received chunk of data into this function, aborting
 822  * the process if an error occurs. Once an error is reported, no further
 823  * data may be fed to the #GMarkupParseContext; all errors are fatal.
 824  *
 825  * Return value: %FALSE if an error occurred, %TRUE on success
 826  **/
 827 gboolean
 828 g_markup_parse_context_parse (GMarkupParseContext *context,
 829                               const gchar         *text,
 830                               gint                 text_len,
 831                               GError             **error)
 832 {
 833   const gchar *first_invalid;
 834
 835   g_return_val_if_fail (context != NULL, FALSE);
 836   g_return_val_if_fail (text != NULL, FALSE);
 837   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
 838   g_return_val_if_fail (!context->parsing, FALSE);
 839
 840   if (text_len < 0)
 841     text_len = strlen (text);
 842
 843   if (text_len == 0)
 844     return TRUE;
 845
 846   context->parsing = TRUE;
 847
 848   if (context->leftover_char_portion)
 849     {
 850       const gchar *first_char;
 851
 852       if ((*text & 0xc0) != 0x80)
 853         first_char = text;
 854       else
 855         first_char = g_utf8_find_next_char (text, text + text_len);
 856
 857       if (first_char)
 858         {
 859           /* leftover_char_portion was completed. Parse it. */
 860           GString *portion = context->leftover_char_portion;
 861
 862           g_string_append_len (context->leftover_char_portion,
 863                                text, first_char - text);
 864
 865           /* hacks to allow recursion */
 866           context->parsing = FALSE;
 867           context->leftover_char_portion = NULL;
 868
 869           if (!g_markup_parse_context_parse (context,
 870                                              portion->str, portion->len,
 871                                              error))
 872             {
 873               g_assert (context->state == STATE_ERROR);
 874             }
 875
 876           g_string_free (portion, TRUE);
 877           context->parsing = TRUE;
 878
 879           /* Skip the fraction of char that was in this text */
 880           text_len -= (first_char - text);
 881           text = first_char;
 882         }
 883       else
 884         {
 885           /* another little chunk of the leftover char; geez
 886            * someone is inefficient.
 887            */
 888           g_string_append_len (context->leftover_char_portion,
 889                                text, text_len);
 890
 891           if (context->leftover_char_portion->len > 7)
 892             {
 893               /* The leftover char portion is too big to be
 894                * a UTF-8 character
 895                */
 896               set_error (context,
 897                          error,
 898                          G_MARKUP_ERROR_BAD_UTF8,
 899                          _("Invalid UTF-8 encoded text"));
 900             }
 901
 902           goto finished;
 903         }
 904     }
 905
 906   context->current_text = text;
 907   context->current_text_len = text_len;
 908   context->iter = context->current_text;
 909   context->start = context->iter;
 910
 911   /* Nothing left after finishing the leftover char, or nothing
 912    * passed in to begin with.
 913    */
 914   if (context->current_text_len == 0)
 915     goto finished;
 916
 917   /* find_current_text_end () assumes the string starts at
 918    * a character start, so we need to validate at least
 919    * that much. It doesn't assume any following bytes
 920    * are valid.
 921    */
 922   if ((*context->current_text & 0xc0) == 0x80) /* not a char start */
 923     {
 924       set_error (context,
 925                  error,
 926                  G_MARKUP_ERROR_BAD_UTF8,
 927                  _("Invalid UTF-8 encoded text"));
 928       goto finished;
 929     }
 930
 931   /* Initialize context->current_text_end, possibly adjusting
 932    * current_text_len, and add any leftover char portion
 933    */
 934   find_current_text_end (context);
 935
 936   /* Validate UTF8 (must be done after we find the end, since
 937    * we could have a trailing incomplete char)
 938    */
 939   if (!g_utf8_validate (context->current_text,
 940                         context->current_text_len,
 941                         &first_invalid))
 942     {
 943       gint newlines = 0;
 944       const gchar *p;
 945       p = context->current_text;
 946       while (p != context->current_text_end)
 947         {
 948           if (*p == '\n')
 949             ++newlines;
 950           ++p;
 951         }
 952
 953       context->line_number += newlines;
 954
 955       set_error (context,
 956                  error,
 957                  G_MARKUP_ERROR_BAD_UTF8,
 958                  _("Invalid UTF-8 encoded text"));
 959       goto finished;
 960     }
 961
 962   while (context->iter != context->current_text_end)
 963     {
 964       switch (context->state)
 965         {
 966         case STATE_START:
 967           /* Possible next state: AFTER_OPEN_ANGLE */
 968
 969           g_assert (context->tag_stack == NULL);
 970
 971           /* whitespace is ignored outside of any elements */
 972           skip_spaces (context);
 973
 974           if (context->iter != context->current_text_end)
 975             {
 976               if (*context->iter == '<')
 977                 {
 978                   /* Move after the open angle */
 979                   advance_char (context);
 980
 981                   context->state = STATE_AFTER_OPEN_ANGLE;
 982
 983                   /* this could start a passthrough */
 984                   context->start = context->iter;
 985
 986                   /* document is now non-empty */
 987                   context->document_empty = FALSE;
 988                 }
 989               else
 990                 {
 991                   set_error (context,
 992                              error,
 993                              G_MARKUP_ERROR_PARSE,
 994                              _("Document must begin with an element (e.g. <book>)"));
 995                 }
 996             }
 997           break;
 998
 999         case STATE_AFTER_OPEN_ANGLE:
1000           /* Possible next states: INSIDE_OPEN_TAG_NAME,
1001            *  AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1002            */
1003           if (*context->iter == '?' ||
1004               *context->iter == '!')
1005             {
1006               /* include < in the passthrough */
1007               const gchar *openangle = "<";
1008               add_to_partial (context, openangle, openangle + 1);
1009               context->start = context->iter;
1010               context->state = STATE_INSIDE_PASSTHROUGH;
1011             }
1012           else if (*context->iter == '/')
1013             {
1014               /* move after it */
1015               advance_char (context);
1016
1017               context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1018             }
1019           else if (is_name_start_char (g_utf8_get_char (context->iter)))
1020             {
1021               context->state = STATE_INSIDE_OPEN_TAG_NAME;
1022
1023               /* start of tag name */
1024               context->start = context->iter;
1025             }
1026           else
1027             {
1028               gchar buf[7];
1029               set_error (context,
1030                          error,
1031                          G_MARKUP_ERROR_PARSE,
1032                          _("'%s' is not a valid character following "
1033                            "a '<' character; it may not begin an "
1034                            "element name"),
1035                          utf8_str (context->iter, buf));
1036             }
1037           break;
1038
1039           /* The AFTER_CLOSE_ANGLE state is actually sort of
1040            * broken, because it doesn't correspond to a range
1041            * of characters in the input stream as the others do,
1042            * and thus makes things harder to conceptualize
1043            */
1044         case STATE_AFTER_CLOSE_ANGLE:
1045           /* Possible next states: INSIDE_TEXT, STATE_START */
1046           if (context->tag_stack == NULL)
1047             {
1048               context->start = NULL;
1049               context->state = STATE_START;
1050             }
1051           else
1052             {
1053               context->start = context->iter;
1054               context->state = STATE_INSIDE_TEXT;
1055             }
1056           break;
1057
1058         case STATE_AFTER_ELISION_SLASH:
1059           /* Possible next state: AFTER_CLOSE_ANGLE */
1060
1061           {
1062             /* We need to pop the tag stack and call the end_element
1063              * function, since this is the close tag
1064              */
1065             GError *tmp_error = NULL;
1066
1067             g_assert (context->tag_stack != NULL);
1068
1069             tmp_error = NULL;
1070             if (context->parser->end_element)
1071               (* context->parser->end_element) (context,
1072                                                 context->tag_stack->data,
1073                                                 context->user_data,
1074                                                 &tmp_error);
1075
1076             g_free (context->tag_stack->data);
1077             context->tag_stack = g_slist_delete_link (context->tag_stack,
1078                                                       context->tag_stack);
1079
1080             if (tmp_error)
1081               {
1082                 mark_error (context, tmp_error);
1083                 g_propagate_error (error, tmp_error);
1084               }
1085             else
1086               {
1087                 if (*context->iter == '>')
1088                   {
1089                     /* move after the close angle */
1090                     advance_char (context);
1091                     context->state = STATE_AFTER_CLOSE_ANGLE;
1092                   }
1093                 else
1094                   {
1095                     gchar buf[7];
1096                     set_error (context,
1097                                error,
1098                                G_MARKUP_ERROR_PARSE,
1099                                _("Odd character '%s', expected a '>' character "
1100                                  "to end the start tag of element '%s'"),
1101                                utf8_str (context->iter, buf),
1102                                current_element (context));
1103                   }
1104               }
1105           }
1106           break;
1107
1108         case STATE_INSIDE_OPEN_TAG_NAME:
1109           /* Possible next states: BETWEEN_ATTRIBUTES */
1110
1111           /* if there's a partial chunk then it's the first part of the
1112            * tag name. If there's a context->start then it's the start
1113            * of the tag name in current_text, the partial chunk goes
1114            * before that start though.
1115            */
1116           advance_to_name_end (context);
1117
1118           if (context->iter == context->current_text_end)
1119             {
1120               /* The name hasn't necessarily ended. Merge with
1121                * partial chunk, leave state unchanged.
1122                */
1123               add_to_partial (context, context->start, context->iter);
1124             }
1125           else
1126             {
1127               /* The name has ended. Combine it with the partial chunk
1128                * if any; push it on the stack; enter next state.
1129                */
1130               add_to_partial (context, context->start, context->iter);
1131               context->tag_stack =
1132                 g_slist_prepend (context->tag_stack,
1133                                  g_string_free (context->partial_chunk,
1134                                                 FALSE));
1135
1136               context->partial_chunk = NULL;
1137
1138               context->state = STATE_BETWEEN_ATTRIBUTES;
1139               context->start = NULL;
1140             }
1141           break;
1142
1143         case STATE_INSIDE_ATTRIBUTE_NAME:
1144           /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
1145
1146           /* read the full name, if we enter the equals sign state
1147            * then add the attribute to the list (without the value),
1148            * otherwise store a partial chunk to be prepended later.
1149            */
1150           advance_to_name_end (context);
1151
1152           if (context->iter == context->current_text_end)
1153             {
1154               /* The name hasn't necessarily ended. Merge with
1155                * partial chunk, leave state unchanged.
1156                */
1157               add_to_partial (context, context->start, context->iter);
1158             }
1159           else
1160             {
1161               /* The name has ended. Combine it with the partial chunk
1162                * if any; push it on the stack; enter next state.
1163                */
1164               GMarkupAttribute *attr;
1165               add_to_partial (context, context->start, context->iter);
1166
1167               attr = attribute_new (NULL, NULL);
1168
1169               attr->name = g_string_free (context->partial_chunk,
1170                                           FALSE);
1171
1172               context->partial_chunk = NULL;
1173               context->start = NULL;
1174
1175               context->attributes =
1176                 g_slist_prepend (context->attributes, attr);
1177
1178               if (*context->iter == '=')
1179                 {
1180                   advance_char (context);
1181                   context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1182                 }
1183               else
1184                 {
1185                   gchar buf[7];
1186                   set_error (context,
1187                              error,
1188                              G_MARKUP_ERROR_PARSE,
1189                              _("Odd character '%s', expected a '=' after "
1190                                "attribute name '%s' of element '%s'"),
1191                              utf8_str (context->iter, buf),
1192                              attr->name,
1193                              current_element (context));
1194
1195                 }
1196             }
1197           break;
1198
1199         case STATE_BETWEEN_ATTRIBUTES:
1200           /* Possible next states: AFTER_CLOSE_ANGLE,
1201            * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1202            */
1203           skip_spaces (context);
1204
1205           if (context->iter != context->current_text_end)
1206             {
1207               if (*context->iter == '/')
1208                 {
1209                   advance_char (context);
1210                   context->state = STATE_AFTER_ELISION_SLASH;
1211                 }
1212               else if (*context->iter == '>')
1213                 {
1214
1215                   advance_char (context);
1216                   context->state = STATE_AFTER_CLOSE_ANGLE;
1217                 }
1218               else if (is_name_start_char (g_utf8_get_char (context->iter)))
1219                 {
1220                   context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1221                   /* start of attribute name */
1222                   context->start = context->iter;
1223                 }
1224               else
1225                 {
1226                   gchar buf[7];
1227                   set_error (context,
1228                              error,
1229                              G_MARKUP_ERROR_PARSE,
1230                              _("Odd character '%s', expected a '>' or '/' "
1231                                "character to end the start tag of "
1232                                "element '%s', or optionally an attribute; "
1233                                "perhaps you used an invalid character in "
1234                                "an attribute name"),
1235                              utf8_str (context->iter, buf),
1236                              current_element (context));
1237                 }
1238
1239               /* If we're done with attributes, invoke
1240                * the start_element callback
1241                */
1242               if (context->state == STATE_AFTER_ELISION_SLASH ||
1243                   context->state == STATE_AFTER_CLOSE_ANGLE)
1244                 {
1245                   const gchar *start_name;
1246                   gchar **attr_names = NULL;
1247                   gchar **attr_values = NULL;
1248                   GError *tmp_error;
1249
1250                   /* Call user callback for element start */
1251                   start_name = current_element (context);
1252
1253                   /* this gratuituously copies the attr names/values
1254                    * I guess
1255                    */
1256                   attribute_list_to_arrays (context->attributes,
1257                                             &attr_names,
1258                                             &attr_values,
1259                                             NULL);
1260
1261                   tmp_error = NULL;
1262                   if (context->parser->start_element)
1263                     (* context->parser->start_element) (context,
1264                                                         start_name,
1265                                                         (const gchar **)attr_names,
1266                                                         (const gchar **)attr_values,
1267                                                         context->user_data,
1268                                                         &tmp_error);
1269
1270                   g_strfreev (attr_names);
1271                   g_strfreev (attr_values);
1272
1273                   /* Go ahead and free this. */
1274                   g_slist_foreach (context->attributes, (GFunc)attribute_free,
1275                                    NULL);
1276                   g_slist_free (context->attributes);
1277                   context->attributes = NULL;
1278
1279                   if (tmp_error != NULL)
1280                     {
1281                       mark_error (context, tmp_error);
1282                       g_propagate_error (error, tmp_error);
1283                     }
1284                 }
1285             }
1286           break;
1287
1288         case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1289           /* Possible next state: INSIDE_ATTRIBUTE_VALUE */
1290           if (*context->iter == '"')
1291             {
1292               advance_char (context);
1293               context->state = STATE_INSIDE_ATTRIBUTE_VALUE;
1294               context->start = context->iter;
1295             }
1296           else
1297             {
1298               gchar buf[7];
1299               set_error (context,
1300                          error,
1301                          G_MARKUP_ERROR_PARSE,
1302                          _("Odd character '%s', expected an open quote mark "
1303                            "after the equals sign when giving value for "
1304                            "attribute '%s' of element '%s'"),
1305                          utf8_str (context->iter, buf),
1306                          current_attribute (context),
1307                          current_element (context));
1308             }
1309           break;
1310
1311         case STATE_INSIDE_ATTRIBUTE_VALUE:
1312           /* Possible next states: BETWEEN_ATTRIBUTES */
1313           do
1314             {
1315               if (*context->iter == '"')
1316                 break;
1317             }
1318           while (advance_char (context));
1319
1320           if (context->iter == context->current_text_end)
1321             {
1322               /* The value hasn't necessarily ended. Merge with
1323                * partial chunk, leave state unchanged.
1324                */
1325               add_to_partial (context, context->start, context->iter);
1326             }
1327           else
1328             {
1329               /* The value has ended at the quote mark. Combine it
1330                * with the partial chunk if any; set it for the current
1331                * attribute.
1332                */
1333               GMarkupAttribute *attr;
1334
1335               add_to_partial (context, context->start, context->iter);
1336
1337               attr = context->attributes->data;
1338
1339               if (unescape_text (context,
1340                                  context->partial_chunk->str,
1341                                  context->partial_chunk->str +
1342                                  context->partial_chunk->len,
1343                                  &attr->value,
1344                                  error))
1345                 {
1346                   /* success, advance past quote and set state. */
1347                   advance_char (context);
1348                   context->state = STATE_BETWEEN_ATTRIBUTES;
1349                   context->start = NULL;
1350                 }
1351
1352               free_partial (context);
1353             }
1354           break;
1355
1356         case STATE_INSIDE_TEXT:
1357           /* Possible next states: AFTER_OPEN_ANGLE */
1358           do
1359             {
1360               if (*context->iter == '<')
1361                 break;
1362             }
1363           while (advance_char (context));
1364
1365           /* The text hasn't necessarily ended. Merge with
1366            * partial chunk, leave state unchanged.
1367            */
1368
1369           add_to_partial (context, context->start, context->iter);
1370
1371           if (context->iter != context->current_text_end)
1372             {
1373               gchar *unescaped = NULL;
1374
1375               /* The text has ended at the open angle. Call the text
1376                * callback.
1377                */
1378
1379               if (unescape_text (context,
1380                                  context->partial_chunk->str,
1381                                  context->partial_chunk->str +
1382                                  context->partial_chunk->len,
1383                                  &unescaped,
1384                                  error))
1385                 {
1386                   GError *tmp_error = NULL;
1387
1388                   if (context->parser->text)
1389                     (*context->parser->text) (context,
1390                                               unescaped,
1391                                               strlen (unescaped),
1392                                               context->user_data,
1393                                               &tmp_error);
1394
1395                   g_free (unescaped);
1396
1397                   if (tmp_error == NULL)
1398                     {
1399                       /* advance past open angle and set state. */
1400                       advance_char (context);
1401                       context->state = STATE_AFTER_OPEN_ANGLE;
1402                       /* could begin a passthrough */
1403                       context->start = context->iter;
1404                     }
1405                   else
1406                     {
1407                       mark_error (context, tmp_error);
1408                       g_propagate_error (error, tmp_error);
1409                     }
1410                 }
1411
1412               free_partial (context);
1413             }
1414           break;
1415
1416         case STATE_AFTER_CLOSE_TAG_SLASH:
1417           /* Possible next state: INSIDE_CLOSE_TAG_NAME */
1418           if (is_name_start_char (g_utf8_get_char (context->iter)))
1419             {
1420               context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1421
1422               /* start of tag name */
1423               context->start = context->iter;
1424             }
1425           else
1426             {
1427               gchar buf[7];
1428               set_error (context,
1429                          error,
1430                          G_MARKUP_ERROR_PARSE,
1431                          _("'%s' is not a valid character following "
1432                            "the characters '</'; '%s' may not begin an "
1433                            "element name"),
1434                          utf8_str (context->iter, buf),
1435                          utf8_str (context->iter, buf));
1436             }
1437           break;
1438
1439         case STATE_INSIDE_CLOSE_TAG_NAME:
1440           /* Possible next state: AFTER_CLOSE_ANGLE */
1441           advance_to_name_end (context);
1442
1443           if (context->iter == context->current_text_end)
1444             {
1445               /* The name hasn't necessarily ended. Merge with
1446                * partial chunk, leave state unchanged.
1447                */
1448               add_to_partial (context, context->start, context->iter);
1449             }
1450           else
1451             {
1452               /* The name has ended. Combine it with the partial chunk
1453                * if any; check that it matches stack top and pop
1454                * stack; invoke proper callback; enter next state.
1455                */
1456               gchar *close_name;
1457
1458               add_to_partial (context, context->start, context->iter);
1459
1460               close_name = g_string_free (context->partial_chunk, FALSE);
1461               context->partial_chunk = NULL;
1462
1463               if (context->tag_stack == NULL)
1464                 {
1465                   set_error (context,
1466                              error,
1467                              G_MARKUP_ERROR_PARSE,
1468                              _("Element '%s' was closed, no element "
1469                                "is currently open"),
1470                              close_name);
1471                 }
1472               else if (strcmp (close_name, current_element (context)) != 0)
1473                 {
1474                   set_error (context,
1475                              error,
1476                              G_MARKUP_ERROR_PARSE,
1477                              _("Element '%s' was closed, but the currently "
1478                                "open element is '%s'"),
1479                              close_name,
1480                              current_element (context));
1481                 }
1482               else if (*context->iter != '>')
1483                 {
1484                   gchar buf[7];
1485                   set_error (context,
1486                              error,
1487                              G_MARKUP_ERROR_PARSE,
1488                              _("'%s' is not a valid character following "
1489                                "the close element name '%s'; the allowed "
1490                                "character is '>'"),
1491                              utf8_str (context->iter, buf),
1492                              close_name);
1493                 }
1494               else
1495                 {
1496                   GError *tmp_error;
1497                   advance_char (context);
1498                   context->state = STATE_AFTER_CLOSE_ANGLE;
1499                   context->start = NULL;
1500
1501                   /* call the end_element callback */
1502                   tmp_error = NULL;
1503                   if (context->parser->end_element)
1504                     (* context->parser->end_element) (context,
1505                                                       close_name,
1506                                                       context->user_data,
1507                                                       &tmp_error);
1508
1509
1510                   /* Pop the tag stack */
1511                   g_free (context->tag_stack->data);
1512                   context->tag_stack = g_slist_delete_link (context->tag_stack,
1513                                                             context->tag_stack);
1514
1515                   if (tmp_error)
1516                     {
1517                       mark_error (context, tmp_error);
1518                       g_propagate_error (error, tmp_error);
1519                     }
1520                 }
1521
1522               g_free (close_name);
1523             }
1524           break;
1525
1526         case STATE_INSIDE_PASSTHROUGH:
1527           /* Possible next state: AFTER_CLOSE_ANGLE */
1528           do
1529             {
1530               if (*context->iter == '>')
1531                 break;
1532             }
1533           while (advance_char (context));
1534
1535           if (context->iter == context->current_text_end)
1536             {
1537               /* The passthrough hasn't necessarily ended. Merge with
1538                * partial chunk, leave state unchanged.
1539                */
1540               add_to_partial (context, context->start, context->iter);
1541             }
1542           else
1543             {
1544               /* The passthrough has ended at the close angle. Combine
1545                * it with the partial chunk if any. Call the passthrough
1546                * callback. Note that the open/close angles are
1547                * included in the text of the passthrough.
1548                */
1549               GError *tmp_error = NULL;
1550
1551               advance_char (context); /* advance past close angle */
1552               add_to_partial (context, context->start, context->iter);
1553
1554               if (context->parser->passthrough)
1555                 (*context->parser->passthrough) (context,
1556                                                  context->partial_chunk->str,
1557                                                  context->partial_chunk->len,
1558                                                  context->user_data,
1559                                                  &tmp_error);
1560
1561               free_partial (context);
1562
1563               if (tmp_error == NULL)
1564                 {
1565                   context->state = STATE_AFTER_CLOSE_ANGLE;
1566                   context->start = context->iter; /* could begin text */
1567                 }
1568               else
1569                 {
1570                   mark_error (context, tmp_error);
1571                   g_propagate_error (error, tmp_error);
1572                 }
1573             }
1574           break;
1575
1576         case STATE_ERROR:
1577           goto finished;
1578           break;
1579
1580         default:
1581           g_assert_not_reached ();
1582           break;
1583         }
1584     }
1585
1586  finished:
1587   context->parsing = FALSE;
1588
1589   return context->state != STATE_ERROR;
1590 }
1591
1592 /**
1593  * g_markup_parse_context_end_parse:
1594  * @context: a #GMarkupParseContext
1595  * @error: return location for a #GError
1596  *
1597  * Signals to the #GMarkupParseContext that all data has been
1598  * fed into the parse context with g_markup_parse_context_parse().
1599  * This function reports an error if the document isn't complete,
1600  * for example if elements are still open.
1601  *
1602  * Return value: %TRUE on success, %FALSE if an error was set
1603  **/
1604 gboolean
1605 g_markup_parse_context_end_parse (GMarkupParseContext *context,
1606                                   GError             **error)
1607 {
1608   g_return_val_if_fail (context != NULL, FALSE);
1609   g_return_val_if_fail (!context->parsing, FALSE);
1610   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1611
1612   if (context->document_empty)
1613     {
1614       set_error (context, error, G_MARKUP_ERROR_EMPTY,
1615                  _("Document was empty or contained only whitespace"));
1616       return FALSE;
1617     }
1618
1619   context->parsing = TRUE;
1620
1621   switch (context->state)
1622     {
1623     case STATE_START:
1624       /* Nothing to do */
1625       break;
1626
1627     case STATE_AFTER_OPEN_ANGLE:
1628       set_error (context, error, G_MARKUP_ERROR_PARSE,
1629                  _("Document ended unexpectedly just after an open angle bracket '<'"));
1630       break;
1631
1632     case STATE_AFTER_CLOSE_ANGLE:
1633       if (context->tag_stack != NULL)
1634         {
1635           /* Error message the same as for INSIDE_TEXT */
1636           set_error (context, error, G_MARKUP_ERROR_PARSE,
1637                      _("Document ended unexpectedly with elements still open - "
1638                        "'%s' was the last element opened"),
1639                      current_element (context));
1640         }
1641       break;
1642
1643     case STATE_AFTER_ELISION_SLASH:
1644       set_error (context, error, G_MARKUP_ERROR_PARSE,
1645                  _("Document ended unexpectedly, expected to see a close angle "
1646                    "bracket ending the tag <%s/>"), current_element (context));
1647       break;
1648
1649     case STATE_INSIDE_OPEN_TAG_NAME:
1650       set_error (context, error, G_MARKUP_ERROR_PARSE,
1651                  _("Document ended unexpectedly inside an element name"));
1652       break;
1653
1654     case STATE_INSIDE_ATTRIBUTE_NAME:
1655       set_error (context, error, G_MARKUP_ERROR_PARSE,
1656                  _("Document ended unexpectedly inside an attribute name"));
1657       break;
1658
1659     case STATE_BETWEEN_ATTRIBUTES:
1660       set_error (context, error, G_MARKUP_ERROR_PARSE,
1661                  _("Document ended unexpectedly inside an element-opening "
1662                    "tag."));
1663       break;
1664
1665     case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1666       set_error (context, error, G_MARKUP_ERROR_PARSE,
1667                  _("Document ended unexpectedly after the equals sign "
1668                    "following an attribute name; no attribute value"));
1669       break;
1670
1671     case STATE_INSIDE_ATTRIBUTE_VALUE:
1672       set_error (context, error, G_MARKUP_ERROR_PARSE,
1673                  _("Document ended unexpectedly while inside an attribute "
1674                    "value"));
1675       break;
1676
1677     case STATE_INSIDE_TEXT:
1678       g_assert (context->tag_stack != NULL);
1679       set_error (context, error, G_MARKUP_ERROR_PARSE,
1680                  _("Document ended unexpectedly with elements still open - "
1681                    "'%s' was the last element opened"),
1682                  current_element (context));
1683       break;
1684
1685     case STATE_AFTER_CLOSE_TAG_SLASH:
1686     case STATE_INSIDE_CLOSE_TAG_NAME:
1687       set_error (context, error, G_MARKUP_ERROR_PARSE,
1688                  _("Document ended unexpectedly inside the close tag for"
1689                    "element '%s'"), current_element);
1690       break;
1691
1692     case STATE_INSIDE_PASSTHROUGH:
1693       set_error (context, error, G_MARKUP_ERROR_PARSE,
1694                  _("Document ended unexpectedly inside a comment or "
1695                    "processing instruction"));
1696       break;
1697
1698     case STATE_ERROR:
1699     default:
1700       g_assert_not_reached ();
1701       break;
1702     }
1703
1704   context->parsing = FALSE;
1705
1706   return context->state != STATE_ERROR;
1707 }
1708
1709 /**
1710  * g_markup_parse_context_get_position:
1711  * @context: a #GMarkupParseContext
1712  * @line_number: return location for a line number, or %NULL
1713  * @char_number: return location for a char-on-line number, or %NULL
1714  *
1715  * Retrieves the current line number and the number of the character on
1716  * that line. Intended for use in error messages; there are no strict
1717  * semantics for what constitutes the "current" line number other than
1718  * "the best number we could come up with for error messages."
1719  *
1720  **/
1721 void
1722 g_markup_parse_context_get_position (GMarkupParseContext *context,
1723                                      gint                *line_number,
1724                                      gint                *char_number)
1725 {
1726   g_return_if_fail (context != NULL);
1727
1728   if (line_number)
1729     *line_number = context->line_number;
1730
1731   if (char_number)
1732     *char_number = context->char_number;
1733 }
1734
1735 static void
1736 append_escaped_text (GString     *str,
1737                      const gchar *text,
1738                      gint         length)
1739 {
1740   const gchar *p;
1741   const gchar *end;
1742
1743   p = text;
1744   end = text + length;
1745
1746   while (p != end)
1747     {
1748       const gchar *next;
1749       next = g_utf8_next_char (p);
1750
1751       switch (*p)
1752         {
1753         case '&':
1754           g_string_append (str, "&amp;");
1755           break;
1756
1757         case '<':
1758           g_string_append (str, "&lt;");
1759           break;
1760
1761         case '>':
1762           g_string_append (str, "&gt;");
1763           break;
1764
1765         case '\'':
1766           g_string_append (str, "&apos;");
1767           break;
1768
1769         case '"':
1770           g_string_append (str, "&quot;");
1771           break;
1772
1773         default:
1774           g_string_append_len (str, p, next - p);
1775           break;
1776         }
1777
1778       p = next;
1779     }
1780 }
1781
1782 /**
1783  * g_markup_escape_text:
1784  * @text: some valid UTF-8 text
1785  * @length: length of @text in bytes
1786  *
1787  * Escapes text so that the markup parser will parse it verbatim.
1788  * Less than, greater than, ampersand, etc. are replaced with the
1789  * corresponding entities. This function would typically be used
1790  * when writing out a file to be parsed with the markup parser.
1791  *
1792  * Return value: escaped text
1793  **/
1794 gchar*
1795 g_markup_escape_text (const gchar *text,
1796                       gint         length)
1797 {
1798   GString *str;
1799
1800   g_return_val_if_fail (text != NULL, NULL);
1801
1802   if (length < 0)
1803     length = strlen (text);
1804
1805   str = g_string_new ("");
1806   append_escaped_text (str, text, length);
1807
1808   return g_string_free (str, FALSE);
1809 }