1 /* json-path.h - JSONPath implementation
3 * This file is part of JSON-GLib
4 * Copyright © 2011 Intel Corp.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
20 * Emmanuele Bassi <ebassi@linux.intel.com>
26 * @short_description: JSONPath implementation
28 * #JsonPath is a simple class implementing the JSONPath syntax for extracting
29 * data out of a JSON tree. While the semantics of the JSONPath expressions are
30 * heavily borrowed by the XPath specification for XML, the syntax follows the
31 * ECMAScript origins of JSON.
33 * Once a #JsonPath instance has been created, it has to compile a JSONPath
34 * expression using json_path_compile() before being able to match it to a
35 * JSON tree; the same #JsonPath instance can be used to match multiple JSON
36 * trees. It it also possible to compile a new JSONPath expression using the
37 * same #JsonPath instance; the previous expression will be discarded only if
38 * the compilation of the new expression is successful.
40 * The simple convenience function json_path_query() can be used for one-off
43 * <refsect2 id="json-path-syntax">
44 * <title>Syntax of the JSONPath expressions</title>
45 * <para>A JSONPath expression is composed by path indices and operators.
46 * Each path index can either be a member name or an element index inside
47 * a JSON tree. A JSONPath expression must start with the '$' operator; each
48 * path index is separated using either the dot notation or the bracket
49 * notation, e.g.:</para>
51 * /* dot notation */
52 * $.store.book[0].title
53 * /* bracket notation */
54 * $['store']['book'][0]['title']
56 * <para>The available operators are:</para>
57 * <table frame='all' id="json-path-operators">
58 * <title>Operators</title>
60 * <colspec name='operator'/>
61 * <colspec name='description'/>
62 * <colspec name='example'/>
63 * <colspec name='results'/>
66 * <entry>Operator</entry>
67 * <entry>Description</entry>
68 * <entry>Example</entry>
69 * <entry>Results</entry>
75 * <entry>The root node</entry>
77 * <entry>The whole document</entry>
80 * <entry>. or []</entry>
81 * <entry>The child member or element</entry>
82 * <entry>$.store.book</entry>
83 * <entry>The contents of the book member of the store object</entry>
87 * <entry>Recursive descent</entry>
88 * <entry>$..author</entry>
89 * <entry>The content of the author member in every object</entry>
93 * <entry>Wildcard</entry>
94 * <entry>$.store.book[*].author</entry>
95 * <entry>The content of the author member of any object of the
96 * array contained in the book member of the store object</entry>
100 * <entry>Subscript</entry>
101 * <entry>$.store.book[0]</entry>
102 * <entry>The first element of the array contained in the book
103 * member of the store object</entry>
108 * <entry>$.store.book[0,1]</entry>
109 * <entry>The first two elements of the array contained in the
110 * book member of the store object</entry>
113 * <entry>[start:end:step]</entry>
114 * <entry>Slice</entry>
115 * <entry>$.store.book[:2]</entry>
116 * <entry>The first two elements of the array contained in the
117 * book member of the store object; the start and step are omitted
118 * and implied to be 0 and 1, respectively</entry>
123 * <para>More information about JSONPath is available on Stefan Gössner's
124 * <ulink url="http://goessner.net/articles/JsonPath/">website</ulink>.</para>
127 * <example id="json-path-example">
128 * <title>Example of JsonPath usage</title>
129 * <para>The following example shows some of the results of using #JsonPath
130 * on a JSON tree. We use the following JSON description of a
132 * <programlisting><![CDATA[
135 { "category": "reference",
136 "author": "Nigel Rees",
137 "title": "Sayings of the Century",
140 { "category": "fiction",
141 "author": "Evelyn Waugh",
142 "title": "Sword of Honour",
145 { "category": "fiction",
146 "author": "Herman Melville",
147 "title": "Moby Dick",
148 "isbn": "0-553-21311-3",
151 { "category": "fiction",
152 "author": "J. R. R. Tolkien",
153 "title": "The Lord of the Rings",
154 "isbn": "0-395-19395-8",
165 * <para>We can parse the JSON using #JsonParser:</para>
167 * JsonParser *parser = json_parser_new ();
168 * json_parser_load_from_data (parser, json_data, -1, NULL);
170 * <para>If we run the following code:</para>
173 * JsonPath *path = json_path_new ();
174 * json_path_compile (path, "$.store..author", NULL);
175 * result = json_path_match (path, json_parser_get_root (parser));
177 * <para>The <emphasis>result</emphasis> #JsonNode will contain an array
178 * with all values of the <emphasis>author</emphasis> member of the objects
179 * in the JSON tree. If we use a #JsonGenerator to convert the #JsonNode
180 * to a string and print it:</para>
182 * JsonGenerator *generator = json_generator_new ();
184 * json_generator_set_pretty (generator, TRUE);
185 * json_generator_set_root (generator, result);
186 * str = json_generator_to_data (generator, NULL);
187 * g_print ("Results: %s\n", str);
189 * <para>The output will be:</para>
190 * <programlisting><![CDATA[
200 * #JsonPath is available since JSON-GLib 0.14
209 #include <glib/gi18n-lib.h>
211 #include "json-path.h"
213 #include "json-debug.h"
214 #include "json-types-private.h"
218 JSON_PATH_NODE_CHILD_MEMBER,
219 JSON_PATH_NODE_CHILD_ELEMENT,
220 JSON_PATH_NODE_RECURSIVE_DESCENT,
221 JSON_PATH_NODE_WILDCARD_MEMBER,
222 JSON_PATH_NODE_WILDCARD_ELEMENT,
223 JSON_PATH_NODE_ELEMENT_SET,
224 JSON_PATH_NODE_ELEMENT_SLICE
227 typedef struct _PathNode PathNode;
231 GObject parent_instance;
233 /* the compiled path */
236 guint is_compiled : 1;
239 struct _JsonPathClass
241 GObjectClass parent_class;
246 PathNodeType node_type;
249 /* JSON_PATH_NODE_CHILD_ELEMENT */
252 /* JSON_PATH_NODE_CHILD_MEMBER */
255 /* JSON_PATH_NODE_ELEMENT_SET */
256 struct { int n_indices; int *indices; } set;
258 /* JSON_PATH_NODE_ELEMENT_SLICE */
259 struct { int start, end, step; } slice;
263 G_DEFINE_TYPE (JsonPath, json_path, G_TYPE_OBJECT)
266 path_node_free (gpointer data)
270 PathNode *node = data;
272 switch (node->node_type)
274 case JSON_PATH_NODE_CHILD_MEMBER:
275 g_free (node->data.member_name);
278 case JSON_PATH_NODE_ELEMENT_SET:
279 g_free (node->data.set.indices);
291 json_path_finalize (GObject *gobject)
293 JsonPath *self = JSON_PATH (gobject);
295 #if GLIB_CHECK_VERSION (2, 28, 0)
296 g_list_free_full (self->nodes, path_node_free);
298 g_list_foreach (self->nodes, (GFunc) path_node_free, NULL);
299 g_list_free (self->nodes);
302 G_OBJECT_CLASS (json_path_parent_class)->finalize (gobject);
306 json_path_class_init (JsonPathClass *klass)
308 G_OBJECT_CLASS (klass)->finalize = json_path_finalize;
312 json_path_init (JsonPath *self)
317 json_path_error_quark (void)
319 return g_quark_from_static_string ("json-path-error");
325 * Creates a new #JsonPath instance.
327 * Once created, the #JsonPath object should be used with json_path_compile()
328 * and json_path_match().
330 * Return value: (transfer full): the newly created #JsonPath instance. Use
331 * g_object_unref() to free the allocated resources when done
338 return g_object_new (JSON_TYPE_PATH, NULL);
344 * @expression: a JSONPath expression
345 * @error: return location for a #GError, or %NULL
347 * Validates and decomposes @expression.
349 * A JSONPath expression must be compiled before calling json_path_match().
351 * Return value: %TRUE on success; on error, @error will be set with
352 * the %JSON_PATH_ERROR domain and a code from the #JsonPathError
353 * enumeration, and %FALSE will be returned
358 json_path_compile (JsonPath *path,
359 const char *expression,
362 const char *p, *end_p;
363 PathNode *root = NULL;
378 g_set_error_literal (error, JSON_PATH_ERROR,
379 JSON_PATH_ERROR_INVALID_QUERY,
380 _("Only one root node is allowed in a JSONPath expression"));
384 if (!(*(p + 1) == '.' || *(p + 1) == '['))
386 /* translators: the %c is the invalid character */
387 g_set_error (error, JSON_PATH_ERROR,
388 JSON_PATH_ERROR_INVALID_QUERY,
389 _("Root node followed by invalid character '%c'"),
394 node = g_new0 (PathNode, 1);
395 node->node_type = JSON_PATH_NODE_ROOT;
398 nodes = g_list_prepend (NULL, root);
405 PathNode *node = NULL;
407 if (*p == '.' && *(p + 1) == '.')
409 node = g_new0 (PathNode, 1);
410 node->node_type = JSON_PATH_NODE_RECURSIVE_DESCENT;
412 else if (*p == '.' && *(p + 1) == '*')
414 node = g_new0 (PathNode, 1);
415 node->node_type = JSON_PATH_NODE_WILDCARD_MEMBER;
422 while (!(*end_p == '.' || *end_p == '[' || *end_p == '\0'))
425 node = g_new0 (PathNode, 1);
426 node->node_type = JSON_PATH_NODE_CHILD_MEMBER;
427 node->data.member_name = g_strndup (p + 1, end_p - p - 1);
431 else if (*p == '[' && *(p + 1) == '\'')
433 if (*(p + 2) == '*' && *(p + 3) == '\'' && *(p + 4) == ']')
435 node = g_new0 (PathNode, 1);
436 node->node_type = JSON_PATH_NODE_WILDCARD_MEMBER;
442 node = g_new0 (PathNode, 1);
443 node->node_type = JSON_PATH_NODE_CHILD_MEMBER;
445 end_p = strchr (p + 2, '\'');
446 node->data.member_name = g_strndup (p + 2, end_p - p - 2);
451 else if (*p == '[' && *(p + 1) == '*' && *(p + 2) == ']')
453 node = g_new0 (PathNode, 1);
454 node->node_type = JSON_PATH_NODE_WILDCARD_ELEMENT;
471 /* slice with missing start */
474 int slice_end = g_ascii_strtoll (end_p + 1, (char **) &end_p, 10) * sign;
489 slice_step = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
493 g_set_error (error, JSON_PATH_ERROR,
494 JSON_PATH_ERROR_INVALID_QUERY,
495 _("Malformed slice expression '%*s'"),
502 node = g_new0 (PathNode, 1);
503 node->node_type = JSON_PATH_NODE_ELEMENT_SLICE;
504 node->data.slice.start = 0;
505 node->data.slice.end = slice_end;
506 node->data.slice.step = slice_step;
508 nodes = g_list_prepend (nodes, node);
513 idx = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
517 GArray *indices = g_array_new (FALSE, TRUE, sizeof (int));
519 g_array_append_val (indices, idx);
521 while (*end_p != ']')
533 idx = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
534 if (!(*end_p == ',' || *end_p == ']'))
536 g_array_unref (indices);
537 g_set_error (error, JSON_PATH_ERROR,
538 JSON_PATH_ERROR_INVALID_QUERY,
539 _("Invalid set definition '%*s'"),
545 g_array_append_val (indices, idx);
548 node = g_new0 (PathNode, 1);
549 node->node_type = JSON_PATH_NODE_ELEMENT_SET;
550 node->data.set.n_indices = indices->len;
551 node->data.set.indices = (int *) g_array_free (indices, FALSE);
552 nodes = g_list_prepend (nodes, node);
556 else if (*end_p == ':')
558 int slice_start = idx;
572 slice_end = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
585 slice_step = g_ascii_strtoll (end_p + 1, (char **) &end_p, 10) * sign;
590 g_set_error (error, JSON_PATH_ERROR,
591 JSON_PATH_ERROR_INVALID_QUERY,
592 _("Invalid slice definition '%*s'"),
598 node = g_new0 (PathNode, 1);
599 node->node_type = JSON_PATH_NODE_ELEMENT_SLICE;
600 node->data.slice.start = slice_start;
601 node->data.slice.end = slice_end;
602 node->data.slice.step = slice_step;
603 nodes = g_list_prepend (nodes, node);
607 else if (*end_p == ']')
609 node = g_new0 (PathNode, 1);
610 node->node_type = JSON_PATH_NODE_CHILD_ELEMENT;
611 node->data.element_index = idx;
612 nodes = g_list_prepend (nodes, node);
618 g_set_error (error, JSON_PATH_ERROR,
619 JSON_PATH_ERROR_INVALID_QUERY,
620 _("Invalid array index definition '%*s'"),
630 nodes = g_list_prepend (nodes, node);
641 nodes = g_list_reverse (nodes);
643 #ifdef JSON_ENABLE_DEBUG
644 if (_json_get_debug_flags () & JSON_DEBUG_PATH)
646 GString *buf = g_string_new (NULL);
648 for (l = nodes; l != NULL; l = l->next)
650 PathNode *cur_node = l->data;
652 switch (cur_node->node_type)
654 case JSON_PATH_NODE_ROOT:
655 g_string_append (buf, "<root");
658 case JSON_PATH_NODE_CHILD_MEMBER:
659 g_string_append_printf (buf, "<member '%s'", cur_node->data.member_name);
662 case JSON_PATH_NODE_CHILD_ELEMENT:
663 g_string_append_printf (buf, "<element '%d'", cur_node->data.element_index);
666 case JSON_PATH_NODE_RECURSIVE_DESCENT:
667 g_string_append (buf, "<recursive descent");
670 case JSON_PATH_NODE_WILDCARD_MEMBER:
671 g_string_append (buf, "<wildcard member");
674 case JSON_PATH_NODE_WILDCARD_ELEMENT:
675 g_string_append (buf, "<wildcard element");
678 case JSON_PATH_NODE_ELEMENT_SET:
682 g_string_append (buf, "<element set ");
683 for (i = 0; i < cur_node->data.set.n_indices - 1; i++)
684 g_string_append_printf (buf, "'%d', ", cur_node->data.set.indices[i]);
686 g_string_append_printf (buf, "'%d'", cur_node->data.set.indices[i]);
690 case JSON_PATH_NODE_ELEMENT_SLICE:
691 g_string_append_printf (buf, "<slice start '%d', end '%d', step '%d'",
692 cur_node->data.slice.start,
693 cur_node->data.slice.end,
694 cur_node->data.slice.step);
698 g_string_append (buf, "<unknown node");
703 g_string_append (buf, ">, ");
705 g_string_append (buf, ">");
708 g_message ("[PATH] " G_STRLOC ": expression '%s' => '%s'", expression, buf->str);
709 g_string_free (buf, TRUE);
711 #endif /* JSON_ENABLE_DEBUG */
713 if (path->nodes != NULL)
715 #if GLIB_CHECK_VERSION (2, 28, 0)
716 g_list_free_full (path->nodes, path_node_free);
718 g_list_foreach (path->nodes, (GFunc) path_node_free, NULL);
719 g_list_free (path->nodes);
724 path->is_compiled = (path->nodes != NULL);
726 return path->nodes != NULL;
729 #if GLIB_CHECK_VERSION (2, 28, 0)
730 g_list_free_full (nodes, path_node_free);
732 g_list_foreach (nodes, (GFunc) path_node_free, NULL);
740 walk_path_node (GList *path,
744 PathNode *node = path->data;
746 switch (node->node_type)
748 case JSON_PATH_NODE_ROOT:
749 walk_path_node (path->next, root, results);
752 case JSON_PATH_NODE_CHILD_MEMBER:
753 if (JSON_NODE_HOLDS_OBJECT (root))
755 JsonObject *object = json_node_get_object (root);
757 if (json_object_has_member (object, node->data.member_name))
759 JsonNode *member = json_object_get_member (object, node->data.member_name);
761 if (path->next == NULL)
763 JSON_NOTE (PATH, "end of path at member '%s'", node->data.member_name);
764 json_array_add_element (results, json_node_copy (member));
767 walk_path_node (path->next, member, results);
772 case JSON_PATH_NODE_CHILD_ELEMENT:
773 if (JSON_NODE_HOLDS_ARRAY (root))
775 JsonArray *array = json_node_get_array (root);
777 if (json_array_get_length (array) >= node->data.element_index)
779 JsonNode *element = json_array_get_element (array, node->data.element_index);
781 if (path->next == NULL)
783 JSON_NOTE (PATH, "end of path at element '%d'", node->data.element_index);
784 json_array_add_element (results, json_node_copy (element));
787 walk_path_node (path->next, element, results);
792 case JSON_PATH_NODE_RECURSIVE_DESCENT:
794 PathNode *tmp = path->next->data;
796 switch (json_node_get_node_type (root))
798 case JSON_NODE_OBJECT:
800 JsonObject *object = json_node_get_object (root);
803 members = json_object_get_members (object);
804 for (l = members; l != NULL; l = l->next)
806 JsonNode *m = json_object_get_member (object, l->data);
808 if (tmp->node_type == JSON_PATH_NODE_CHILD_MEMBER &&
809 strcmp (tmp->data.member_name, l->data) == 0)
811 JSON_NOTE (PATH, "entering '%s'", tmp->data.member_name);
812 walk_path_node (path->next, root, results);
816 JSON_NOTE (PATH, "recursing into '%s'", (char *) l->data);
817 walk_path_node (path, m, results);
820 g_list_free (members);
824 case JSON_NODE_ARRAY:
826 JsonArray *array = json_node_get_array (root);
830 members = json_array_get_elements (array);
831 for (l = members, i = 0; l != NULL; l = l->next, i += 1)
833 JsonNode *m = l->data;
835 if (tmp->node_type == JSON_PATH_NODE_CHILD_ELEMENT &&
836 tmp->data.element_index == i)
838 JSON_NOTE (PATH, "entering '%d'", tmp->data.element_index);
839 walk_path_node (path->next, root, results);
843 JSON_NOTE (PATH, "recursing into '%d'", i);
844 walk_path_node (path, m, results);
847 g_list_free (members);
857 case JSON_PATH_NODE_WILDCARD_MEMBER:
858 if (JSON_NODE_HOLDS_OBJECT (root))
860 JsonObject *object = json_node_get_object (root);
863 members = json_object_get_members (object);
864 for (l = members; l != NULL; l = l->next)
866 JsonNode *member = json_object_get_member (object, l->data);
868 if (path->next != NULL)
869 walk_path_node (path->next, member, results);
872 JSON_NOTE (PATH, "glob match member '%s'", (char *) l->data);
873 json_array_add_element (results, json_node_copy (root));
876 g_list_free (members);
879 json_array_add_element (results, json_node_copy (root));
882 case JSON_PATH_NODE_WILDCARD_ELEMENT:
883 if (JSON_NODE_HOLDS_ARRAY (root))
885 JsonArray *array = json_node_get_array (root);
889 elements = json_array_get_elements (array);
890 for (l = elements, i = 0; l != NULL; l = l->next, i += 1)
892 JsonNode *element = l->data;
894 if (path->next != NULL)
895 walk_path_node (path->next, element, results);
898 JSON_NOTE (PATH, "glob match element '%d'", i);
899 json_array_add_element (results, json_node_copy (root));
902 g_list_free (elements);
905 json_array_add_element (results, json_node_copy (root));
908 case JSON_PATH_NODE_ELEMENT_SET:
909 if (JSON_NODE_HOLDS_ARRAY (root))
911 JsonArray *array = json_node_get_array (root);
914 for (i = 0; i < node->data.set.n_indices; i += 1)
916 int idx = node->data.set.indices[i];
917 JsonNode *element = json_array_get_element (array, idx);
919 if (path->next != NULL)
920 walk_path_node (path->next, element, results);
923 JSON_NOTE (PATH, "set element '%d'", idx);
924 json_array_add_element (results, json_node_copy (element));
930 case JSON_PATH_NODE_ELEMENT_SLICE:
931 if (JSON_NODE_HOLDS_ARRAY (root))
933 JsonArray *array = json_node_get_array (root);
936 if (node->data.slice.start < 0)
938 start = json_array_get_length (array)
939 + node->data.slice.start;
941 end = json_array_get_length (array)
942 + node->data.slice.end;
946 start = node->data.slice.start;
947 end = node->data.slice.end;
950 for (i = start; i < end; i += node->data.slice.step)
952 JsonNode *element = json_array_get_element (array, i);
954 if (path->next != NULL)
955 walk_path_node (path->next, element, results);
958 JSON_NOTE (PATH, "slice element '%d'", i);
959 json_array_add_element (results, json_node_copy (element));
972 * @path: a compiled #JsonPath
975 * Matches the JSON tree pointed by @root using the expression compiled
976 * into the #JsonPath.
978 * The matching #JsonNode<!-- -->s will be copied into a #JsonArray and
979 * returned wrapped in a #JsonNode.
981 * Return value: (transfer full): a newly-created #JsonNode of type
982 * %JSON_NODE_ARRAY containing an array of matching #JsonNode<!-- -->s.
983 * Use json_node_free() when done
988 json_path_match (JsonPath *path,
994 g_return_val_if_fail (JSON_IS_PATH (path), NULL);
995 g_return_val_if_fail (path->is_compiled, NULL);
996 g_return_val_if_fail (root != NULL, NULL);
998 results = json_array_new ();
1000 walk_path_node (path->nodes, root, results);
1002 retval = json_node_new (JSON_NODE_ARRAY);
1003 json_node_take_array (retval, results);
1010 * @expression: a JSONPath expression
1011 * @root: the root of a JSON tree
1012 * @error: return location for a #GError, or %NULL
1014 * Queries a JSON tree using a JSONPath expression.
1016 * This function is a simple wrapper around json_path_new(),
1017 * json_path_compile() and json_path_match(). It implicitly
1018 * creates a #JsonPath instance, compiles @expression and
1019 * matches it against the JSON tree pointed by @root.
1021 * Return value: (transfer full): a newly-created #JsonNode of type
1022 * %JSON_NODE_ARRAY containing an array of matching #JsonNode<!-- -->s.
1023 * Use json_node_free() when done
1028 json_path_query (const char *expression,
1032 JsonPath *path = json_path_new ();
1035 if (!json_path_compile (path, expression, error))
1037 g_object_unref (path);
1041 retval = json_path_match (path, root);
1043 g_object_unref (path);