*
* You should have received a copy of the GNU Lesser General Public
* License along with GLib; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
+ * see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include "gmarkup.h"
+#include "gatomic.h"
#include "gslice.h"
#include "galloca.h"
#include "gstrfuncs.h"
* SECTION:markup
* @Title: Simple XML Subset Parser
* @Short_description: parses a subset of XML
- * @See_also: <ulink url="http://www.w3.org/TR/REC-xml/">XML
- * Specification</ulink>
+ * @See_also: [XML Specification](http://www.w3.org/TR/REC-xml/)
*
* The "GMarkup" parser is intended to parse a simple markup format
* that's a subset of XML. This is a small, efficient, easy-to-use
*
* GMarkup is not guaranteed to signal an error on all invalid XML;
* the parser may accept documents that an XML parser would not.
- * However, XML documents which are not well-formed<footnote
- * id="wellformed">Being wellformed is a weaker condition than being
- * valid. See the <ulink url="http://www.w3.org/TR/REC-xml/">XML
- * specification</ulink> for definitions of these terms.</footnote>
- * are not considered valid GMarkup documents.
+ * However, XML documents which are not well-formed (which is a
+ * weaker condition than being valid. See the
+ * [XML specification](http://www.w3.org/TR/REC-xml/)
+ * for definitions of these terms.) are not considered valid GMarkup
+ * documents.
*
* Simplifications to XML include:
- * <itemizedlist>
- * <listitem>Only UTF-8 encoding is allowed</listitem>
- * <listitem>No user-defined entities</listitem>
- * <listitem>Processing instructions, comments and the doctype declaration
- * are "passed through" but are not interpreted in any way</listitem>
- * <listitem>No DTD or validation.</listitem>
- * </itemizedlist>
+ *
+ * - Only UTF-8 encoding is allowed
+ *
+ * - No user-defined entities
+ *
+ * - Processing instructions, comments and the doctype declaration
+ * are "passed through" but are not interpreted in any way
+ *
+ * - No DTD or validation
*
* The markup format does support:
- * <itemizedlist>
- * <listitem>Elements</listitem>
- * <listitem>Attributes</listitem>
- * <listitem>5 standard entities:
- * <literal>&amp; &lt; &gt; &quot; &apos;</literal>
- * </listitem>
- * <listitem>Character references</listitem>
- * <listitem>Sections marked as CDATA</listitem>
- * </itemizedlist>
+ *
+ * - Elements
+ *
+ * - Attributes
+ *
+ * - 5 standard entities: & < > " '
+ *
+ * - Character references
+ *
+ * - Sections marked as CDATA
*/
G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
{
const GMarkupParser *parser;
+ volatile gint ref_count;
+
GMarkupParseFlags flags;
gint line_number;
* the parse context can't continue to parse text (you have to
* free it and create a new parse context).
*
- * Return value: a new #GMarkupParseContext
+ * Returns: a new #GMarkupParseContext
**/
GMarkupParseContext *
g_markup_parse_context_new (const GMarkupParser *parser,
context = g_new (GMarkupParseContext, 1);
+ context->ref_count = 1;
context->parser = parser;
context->flags = flags;
context->user_data = user_data;
return context;
}
+/**
+ * g_markup_parse_context_ref:
+ * @context: a #GMarkupParseContext
+ *
+ * Increases the reference count of @context.
+ *
+ * Returns: the same @context
+ *
+ * Since: 2.36
+ **/
+GMarkupParseContext *
+g_markup_parse_context_ref (GMarkupParseContext *context)
+{
+ g_return_val_if_fail (context != NULL, NULL);
+ g_return_val_if_fail (context->ref_count > 0, NULL);
+
+ g_atomic_int_inc (&context->ref_count);
+
+ return context;
+}
+
+/**
+ * g_markup_parse_context_unref:
+ * @context: a #GMarkupParseContext
+ *
+ * Decreases the reference count of @context. When its reference count
+ * drops to 0, it is freed.
+ *
+ * Since: 2.36
+ **/
+void
+g_markup_parse_context_unref (GMarkupParseContext *context)
+{
+ g_return_if_fail (context != NULL);
+ g_return_if_fail (context->ref_count > 0);
+
+ if (g_atomic_int_dec_and_test (&context->ref_count))
+ g_markup_parse_context_free (context);
+}
+
static void
string_full_free (gpointer ptr)
{
g_propagate_error (error, tmp_error);
}
+G_GNUC_PRINTF(4, 5)
static void
set_error (GMarkupParseContext *context,
GError **error,
return buf;
}
+G_GNUC_PRINTF(5, 6)
static void
set_unescape_error (GMarkupParseContext *context,
GError **error,
normalize_attribute = FALSE;
/*
- * Meeks' theorum: unescaping can only shrink text.
+ * Meeks' theorem: unescaping can only shrink text.
* for < etc. this is obvious, for  more
* thought is required, but this is patently so.
*/
"inside a character reference "
"(ê for example) - perhaps "
"the digit is too large"),
- end - from, from);
+ (int)(end - from), from);
return FALSE;
}
else if (*end != ';')
from, G_MARKUP_ERROR_PARSE,
_("Character reference '%-.*s' does not "
"encode a permitted character"),
- end - from, from);
+ (int)(end - from), from);
return FALSE;
}
}
set_unescape_error (context, error,
from, G_MARKUP_ERROR_PARSE,
_("Entity name '%-.*s' is not known"),
- end-from, from);
+ (int)(end - from), from);
else
set_unescape_error (context, error,
from, G_MARKUP_ERROR_PARSE,
emit_start_element (GMarkupParseContext *context,
GError **error)
{
- int i;
+ int i, j = 0;
const gchar *start_name;
const gchar **attr_names;
const gchar **attr_values;
GError *tmp_error;
+ /* In case we want to ignore qualified tags and we see that we have
+ * one here, we push a subparser. This will ignore all tags inside of
+ * the qualified tag.
+ *
+ * We deal with the end of the subparser from emit_end_element.
+ */
+ if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
+ {
+ static const GMarkupParser ignore_parser;
+ g_markup_parse_context_push (context, &ignore_parser, NULL);
+ clear_attributes (context);
+ return;
+ }
+
attr_names = g_newa (const gchar *, context->cur_attr + 2);
attr_values = g_newa (const gchar *, context->cur_attr + 2);
for (i = 0; i < context->cur_attr + 1; i++)
{
- attr_names[i] = context->attr_names[i]->str;
- attr_values[i] = context->attr_values[i]->str;
+ /* Possibly omit qualified attribute names from the list */
+ if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (context->attr_names[i]->str, ':'))
+ continue;
+
+ attr_names[j] = context->attr_names[i]->str;
+ attr_values[j] = context->attr_values[i]->str;
+ j++;
}
- attr_names[i] = NULL;
- attr_values[i] = NULL;
+ attr_names[j] = NULL;
+ attr_values[j] = NULL;
/* Call user callback for element start */
tmp_error = NULL;
propagate_error (context, error, tmp_error);
}
+static void
+emit_end_element (GMarkupParseContext *context,
+ GError **error)
+{
+ /* We need to pop the tag stack and call the end_element
+ * function, since this is the close tag
+ */
+ GError *tmp_error = NULL;
+
+ g_assert (context->tag_stack != NULL);
+
+ possibly_finish_subparser (context);
+
+ /* We might have just returned from our ignore subparser */
+ if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (current_element (context), ':'))
+ {
+ g_markup_parse_context_pop (context);
+ pop_tag (context);
+ return;
+ }
+
+ tmp_error = NULL;
+ if (context->parser->end_element)
+ (* context->parser->end_element) (context,
+ current_element (context),
+ context->user_data,
+ &tmp_error);
+
+ ensure_no_outstanding_subparser (context);
+
+ if (tmp_error)
+ {
+ mark_error (context, tmp_error);
+ g_propagate_error (error, tmp_error);
+ }
+
+ pop_tag (context);
+}
+
/**
* g_markup_parse_context_parse:
* @context: a #GMarkupParseContext
* is reported, no further data may be fed to the #GMarkupParseContext;
* all errors are fatal.
*
- * Return value: %FALSE if an error occurred, %TRUE on success
+ * Returns: %FALSE if an error occurred, %TRUE on success
*/
gboolean
g_markup_parse_context_parse (GMarkupParseContext *context,
case STATE_AFTER_ELISION_SLASH:
/* Possible next state: AFTER_CLOSE_ANGLE */
+ if (*context->iter == '>')
+ {
+ /* move after the close angle */
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ emit_end_element (context, error);
+ }
+ else
+ {
+ gchar buf[8];
- {
- /* We need to pop the tag stack and call the end_element
- * function, since this is the close tag
- */
- GError *tmp_error = NULL;
-
- g_assert (context->tag_stack != NULL);
-
- possibly_finish_subparser (context);
-
- tmp_error = NULL;
- if (context->parser->end_element)
- (* context->parser->end_element) (context,
- current_element (context),
- context->user_data,
- &tmp_error);
-
- ensure_no_outstanding_subparser (context);
-
- if (tmp_error)
- {
- mark_error (context, tmp_error);
- g_propagate_error (error, tmp_error);
- }
- else
- {
- if (*context->iter == '>')
- {
- /* move after the close angle */
- advance_char (context);
- context->state = STATE_AFTER_CLOSE_ANGLE;
- }
- else
- {
- gchar buf[8];
-
- set_error (context,
- error,
- G_MARKUP_ERROR_PARSE,
- _("Odd character '%s', expected a '>' character "
- "to end the empty-element tag '%s'"),
- utf8_str (context->iter, buf),
- current_element (context));
- }
- }
- pop_tag (context);
- }
+ set_error (context,
+ error,
+ G_MARKUP_ERROR_PARSE,
+ _("Odd character '%s', expected a '>' character "
+ "to end the empty-element tag '%s'"),
+ utf8_str (context->iter, buf),
+ current_element (context));
+ }
break;
case STATE_INSIDE_OPEN_TAG_NAME:
}
else
{
- GError *tmp_error;
advance_char (context);
context->state = STATE_AFTER_CLOSE_ANGLE;
context->start = NULL;
- possibly_finish_subparser (context);
-
- /* call the end_element callback */
- tmp_error = NULL;
- if (context->parser->end_element)
- (* context->parser->end_element) (context,
- close_name->str,
- context->user_data,
- &tmp_error);
-
- ensure_no_outstanding_subparser (context);
- pop_tag (context);
-
- if (tmp_error)
- propagate_error (context, error, tmp_error);
+ emit_end_element (context, error);
}
context->partial_chunk = close_name;
truncate_partial (context);
* This function reports an error if the document isn't complete,
* for example if elements are still open.
*
- * Return value: %TRUE on success, %FALSE if an error was set
+ * Returns: %TRUE on success, %FALSE if an error was set
*/
gboolean
g_markup_parse_context_end_parse (GMarkupParseContext *context,
* As an example, see the following implementation of a simple
* parser that counts the number of tags encountered.
*
- * |[
+ * |[<!-- language="C" -->
* typedef struct
* {
* gint tag_count;
* In order to allow this parser to be easily used as a subparser, the
* following interface is provided:
*
- * |[
+ * |[<!-- language="C" -->
* void
* start_counting (GMarkupParseContext *context)
* {
*
* The subparser would then be used as follows:
*
- * |[
+ * |[<!-- language="C" -->
* static void start_element (context, element_name, ...)
* {
* if (strcmp (element_name, "count-these") == 0)
* start_counting (context);
*
- * /* else, handle other tags... */
+ * // else, handle other tags...
* }
*
* static void end_element (context, element_name, ...)
* if (strcmp (element_name, "count-these") == 0)
* g_print ("Counted %d tags\n", end_counting (context));
*
- * /* else, handle other tags... */
+ * // else, handle other tags...
* }
* ]|
*
* of line endings and attribute values.
*
* Note also that this function will produce character references in
- * the range of &#x1; ... &#x1f; for all control sequences
+ * the range of  ...  for all control sequences
* except for tabstop, newline and carriage return. The character
* references in this range are not valid XML 1.0, but they are
* valid XML 1.1 and will be accepted by the GMarkup parser.
*
- * Return value: a newly allocated string with the escaped text
+ * Returns: a newly allocated string with the escaped text
*/
gchar*
g_markup_escape_text (const gchar *text,
* Partially based on code from printf-parser.c,
* Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
*
- * Return value: pointer to the next conversion in @format,
+ * Returns: pointer to the next conversion in @format,
* or %NULL, if none.
*/
static const char *
* all string and character arguments in the fashion
* of g_markup_escape_text(). See g_markup_printf_escaped().
*
- * Return value: newly allocated result from formatting
+ * Returns: newly allocated result from formatting
* operation. Free with g_free().
*
* Since: 2.4
*/
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
gchar *
g_markup_vprintf_escaped (const gchar *format,
va_list args)
* To find the span of the first argument, we find the first position
* where the two arguments differ, which tells us that the first
* argument formatted to "Susan & Fred". We then escape that
- * to "Susan & Fred" and join up with the intermediate portions
+ * to "Susan & Fred" and join up with the intermediate portions
* of the format string and the second argument to get
- * "Susan & Fred ate 5 apples".
+ * "Susan & Fred ate 5 apples".
*/
/* Create the two modified format strings
G_VA_COPY (args2, args);
output1 = g_strdup_vprintf (format1->str, args);
+
if (!output1)
{
va_end (args2);
va_end (args2);
if (!output2)
goto cleanup;
-
result = g_string_new (NULL);
/* Iterate through the original format string again,
return NULL;
}
+#pragma GCC diagnostic pop
+
/**
* g_markup_printf_escaped:
* @format: printf() style format string
* output, without having to worry that the strings
* might themselves contain markup.
*
- * |[
- * const char *store = "Fortnum & Mason";
+ * |[<!-- language="C" -->
+ * const char *store = "Fortnum & Mason";
* const char *item = "Tea";
* char *output;
- *
- * output = g_markup_printf_escaped ("<purchase>"
- * "<store>%s</store>"
- * "<item>%s</item>"
- * "</purchase>",
+ *
+ * output = g_markup_printf_escaped ("<purchase>"
+ * "<store>%s</store>"
+ * "<item>%s</item>"
+ * "</purchase>",
* store, item);
* ]|
*
- * Return value: newly allocated result from formatting
+ * Returns: newly allocated result from formatting
* operation. Free with g_free().
*
* Since: 2.4
* %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
* will be returned and @error will be set as appropriate.
*
- * Return value: %TRUE if successful
+ * Returns: %TRUE if successful
*
* Since: 2.16
**/