1 /* json-path.h - JSONPath implementation
3 * This file is part of JSON-GLib
4 * Copyright © 2011 Intel Corp.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
20 * Emmanuele Bassi <ebassi@linux.intel.com>
26 * @short_description: JSONPath implementation
28 * #JsonPath is a simple class implementing the JSONPath syntax for extracting
29 * data out of a JSON tree. While the semantics of the JSONPath expressions are
30 * heavily borrowed by the XPath specification for XML, the syntax follows the
31 * ECMAScript origins of JSON.
33 * Once a #JsonPath instance has been created, it has to compile a JSONPath
34 * expression using json_path_compile() before being able to match it to a
35 * JSON tree; the same #JsonPath instance can be used to match multiple JSON
36 * trees. It it also possible to compile a new JSONPath expression using the
37 * same #JsonPath instance; the previous expression will be discarded only if
38 * the compilation of the new expression is successful.
40 * The simple convenience function json_path_query() can be used for one-off
43 * ## Syntax of the JSONPath expressions ##
45 * A JSONPath expression is composed by path indices and operators.
46 * Each path index can either be a member name or an element index inside
47 * a JSON tree. A JSONPath expression must start with the '$' operator; each
48 * path index is separated using either the dot notation or the bracket
53 * $.store.book[0].title
56 * $['store']['book'][0]['title']
59 * The available operators are:
62 * The '$' character represents the root node of the JSON tree, and
63 * matches the entire document.
65 * * Child nodes can either be matched using '.' or '[]'. For instance,
66 * both `$.store.book` and `$['store']['book'] match the contents of
67 * the book member of the store object.
69 * * Child nodes can be reached without specifying the whole tree structure
70 * through the recursive descent operator, or '..'. For instance,
71 * `$..author` matches all author member in every object.
73 * * Child nodes can grouped through the wildcard operator, or '*'. For
74 * instance, `$.store.book[*].author` matches all author members of any
75 * object element contained in the book array of the store object.
77 * * Element nodes can be accessed using their index (starting from zero)
78 * in the subscript operator '[]'. For instance, `$.store.book[0]` matches
79 * the first element of the book array of the store object.
81 * * Subsets of element nodes can be accessed using the set notation
82 * operator '[start,end]'. For instance, `$.store.book[0,2]` matches the
83 * first, second, and third elements of the book array of the store
86 * * Slices of element nodes can be accessed using the slice notation
87 * operation '[start:end:step]'. If start is omitted, the starting index
88 * of the slice is implied to be zero; if end is omitted, the ending index
89 * of the slice is implied to be the length of the array; if step is
90 * omitted, the step of the slice is implied to be 1. For instance,
91 * `$.store.book[:2]` matches the first two elements of the book array
92 * of the store object.
94 * More information about JSONPath is available on Stefan Gössner's
95 * [JSONPath website](http://goessner.net/articles/JsonPath/).
97 * ## Example of JSONPath matches
98 * The following example shows some of the results of using #JsonPath
99 * on a JSON tree. We use the following JSON description of a bookstore:
103 * { "category": "reference", "author": "Nigel Rees",
104 * "title": "Sayings of the Century", "price": "8.95" },
105 * { "category": "fiction", "author": "Evelyn Waugh",
106 * "title": "Sword of Honour", "price": "12.99" },
107 * { "category": "fiction", "author": "Herman Melville",
108 * "title": "Moby Dick", "isbn": "0-553-21311-3",
110 * { "category": "fiction", "author": "J. R. R. Tolkien",
111 * "title": "The Lord of the Rings", "isbn": "0-395-19395-8",
114 * "bicycle": { "color": "red", "price": "19.95" }
119 * We can parse the JSON using #JsonParser:
121 * |[<!-- language="C" -->
122 * JsonParser *parser = json_parser_new ();
123 * json_parser_load_from_data (parser, json_data, -1, NULL);
126 * If we run the following code:
128 * |[<!-- language="C" -->
130 * JsonPath *path = json_path_new ();
131 * json_path_compile (path, "$.store..author", NULL);
132 * result = json_path_match (path, json_parser_get_root (parser));
135 * The result #JsonNode will contain an array with all values of the
136 * author member of the objects in the JSON tree. If we use a
137 * #JsonGenerator to convert the #JsonNode to a string and print it:
139 * |[<!-- language="C" -->
140 * JsonGenerator *generator = json_generator_new ();
141 * json_generator_set_root (generator, result);
142 * char *str = json_generator_to_data (generator, NULL);
143 * g_print ("Results: %s\n", str);
146 * The output will be:
149 * ["Nigel Rees","Evelyn Waugh","Herman Melville","J. R. R. Tolkien"]
152 * #JsonPath is available since JSON-GLib 0.14
159 #include <glib/gi18n-lib.h>
161 #include "json-path.h"
163 #include "json-debug.h"
164 #include "json-types-private.h"
168 JSON_PATH_NODE_CHILD_MEMBER,
169 JSON_PATH_NODE_CHILD_ELEMENT,
170 JSON_PATH_NODE_RECURSIVE_DESCENT,
171 JSON_PATH_NODE_WILDCARD_MEMBER,
172 JSON_PATH_NODE_WILDCARD_ELEMENT,
173 JSON_PATH_NODE_ELEMENT_SET,
174 JSON_PATH_NODE_ELEMENT_SLICE
177 typedef struct _PathNode PathNode;
181 GObject parent_instance;
183 /* the compiled path */
186 guint is_compiled : 1;
189 struct _JsonPathClass
191 GObjectClass parent_class;
196 PathNodeType node_type;
199 /* JSON_PATH_NODE_CHILD_ELEMENT */
202 /* JSON_PATH_NODE_CHILD_MEMBER */
205 /* JSON_PATH_NODE_ELEMENT_SET */
206 struct { int n_indices; int *indices; } set;
208 /* JSON_PATH_NODE_ELEMENT_SLICE */
209 struct { int start, end, step; } slice;
213 G_DEFINE_QUARK (json-path-error-quark, json_path_error)
215 G_DEFINE_TYPE (JsonPath, json_path, G_TYPE_OBJECT)
218 path_node_free (gpointer data)
222 PathNode *node = data;
224 switch (node->node_type)
226 case JSON_PATH_NODE_CHILD_MEMBER:
227 g_free (node->data.member_name);
230 case JSON_PATH_NODE_ELEMENT_SET:
231 g_free (node->data.set.indices);
243 json_path_finalize (GObject *gobject)
245 JsonPath *self = JSON_PATH (gobject);
247 g_list_free_full (self->nodes, path_node_free);
249 G_OBJECT_CLASS (json_path_parent_class)->finalize (gobject);
253 json_path_class_init (JsonPathClass *klass)
255 G_OBJECT_CLASS (klass)->finalize = json_path_finalize;
259 json_path_init (JsonPath *self)
266 * Creates a new #JsonPath instance.
268 * Once created, the #JsonPath object should be used with json_path_compile()
269 * and json_path_match().
271 * Return value: (transfer full): the newly created #JsonPath instance. Use
272 * g_object_unref() to free the allocated resources when done
279 return g_object_new (JSON_TYPE_PATH, NULL);
282 #ifdef JSON_ENABLE_DEBUG
283 /* used as the function for a g_list_foreach() on a list of PathNode; needs
284 * a GString as the payload to build the output string
287 json_path_foreach_print (gpointer data,
290 PathNode *cur_node = data;
291 GString *buf = user_data;
293 switch (cur_node->node_type)
295 case JSON_PATH_NODE_ROOT:
296 g_string_append (buf, "<root");
299 case JSON_PATH_NODE_CHILD_MEMBER:
300 g_string_append_printf (buf, "<member '%s'", cur_node->data.member_name);
303 case JSON_PATH_NODE_CHILD_ELEMENT:
304 g_string_append_printf (buf, "<element '%d'", cur_node->data.element_index);
307 case JSON_PATH_NODE_RECURSIVE_DESCENT:
308 g_string_append (buf, "<recursive descent");
311 case JSON_PATH_NODE_WILDCARD_MEMBER:
312 g_string_append (buf, "<wildcard member");
315 case JSON_PATH_NODE_WILDCARD_ELEMENT:
316 g_string_append (buf, "<wildcard element");
319 case JSON_PATH_NODE_ELEMENT_SET:
323 g_string_append (buf, "<element set ");
324 for (i = 0; i < cur_node->data.set.n_indices - 1; i++)
325 g_string_append_printf (buf, "'%d', ", cur_node->data.set.indices[i]);
327 g_string_append_printf (buf, "'%d'", cur_node->data.set.indices[i]);
331 case JSON_PATH_NODE_ELEMENT_SLICE:
332 g_string_append_printf (buf, "<slice start '%d', end '%d', step '%d'",
333 cur_node->data.slice.start,
334 cur_node->data.slice.end,
335 cur_node->data.slice.step);
339 g_string_append (buf, "<unknown node");
343 g_string_append (buf, ">");
345 #endif /* JSON_ENABLE_DEBUG */
350 * @expression: a JSONPath expression
351 * @error: return location for a #GError, or %NULL
353 * Validates and decomposes @expression.
355 * A JSONPath expression must be compiled before calling json_path_match().
357 * Return value: %TRUE on success; on error, @error will be set with
358 * the %JSON_PATH_ERROR domain and a code from the #JsonPathError
359 * enumeration, and %FALSE will be returned
364 json_path_compile (JsonPath *path,
365 const char *expression,
368 const char *p, *end_p;
369 PathNode *root = NULL;
372 g_return_val_if_fail (expression != NULL, FALSE);
386 g_set_error_literal (error, JSON_PATH_ERROR,
387 JSON_PATH_ERROR_INVALID_QUERY,
388 _("Only one root node is allowed in a JSONPath expression"));
392 if (!(*(p + 1) == '.' || *(p + 1) == '[' || *(p + 1) == '\0'))
394 g_set_error (error, JSON_PATH_ERROR,
395 JSON_PATH_ERROR_INVALID_QUERY,
396 /* translators: the %c is the invalid character */
397 _("Root node followed by invalid character '%c'"),
402 node = g_new0 (PathNode, 1);
403 node->node_type = JSON_PATH_NODE_ROOT;
406 nodes = g_list_prepend (NULL, root);
413 PathNode *node = NULL;
415 if (*p == '.' && *(p + 1) == '.')
417 node = g_new0 (PathNode, 1);
418 node->node_type = JSON_PATH_NODE_RECURSIVE_DESCENT;
420 else if (*p == '.' && *(p + 1) == '*')
422 node = g_new0 (PathNode, 1);
423 node->node_type = JSON_PATH_NODE_WILDCARD_MEMBER;
430 while (!(*end_p == '.' || *end_p == '[' || *end_p == '\0'))
435 g_set_error_literal (error, JSON_PATH_ERROR,
436 JSON_PATH_ERROR_INVALID_QUERY,
437 _("Missing member name or wildcard after . character"));
441 node = g_new0 (PathNode, 1);
442 node->node_type = JSON_PATH_NODE_CHILD_MEMBER;
443 node->data.member_name = g_strndup (p + 1, end_p - p - 1);
447 else if (*p == '[' && *(p + 1) == '\'')
449 if (*(p + 2) == '*' && *(p + 3) == '\'' && *(p + 4) == ']')
451 node = g_new0 (PathNode, 1);
452 node->node_type = JSON_PATH_NODE_WILDCARD_MEMBER;
458 node = g_new0 (PathNode, 1);
459 node->node_type = JSON_PATH_NODE_CHILD_MEMBER;
461 end_p = strchr (p + 2, '\'');
462 node->data.member_name = g_strndup (p + 2, end_p - p - 2);
467 else if (*p == '[' && *(p + 1) == '*' && *(p + 2) == ']')
469 node = g_new0 (PathNode, 1);
470 node->node_type = JSON_PATH_NODE_WILDCARD_ELEMENT;
487 /* slice with missing start */
490 int slice_end = g_ascii_strtoll (end_p + 1, (char **) &end_p, 10) * sign;
505 slice_step = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
509 g_set_error (error, JSON_PATH_ERROR,
510 JSON_PATH_ERROR_INVALID_QUERY,
511 _("Malformed slice expression '%*s'"),
518 node = g_new0 (PathNode, 1);
519 node->node_type = JSON_PATH_NODE_ELEMENT_SLICE;
520 node->data.slice.start = 0;
521 node->data.slice.end = slice_end;
522 node->data.slice.step = slice_step;
524 nodes = g_list_prepend (nodes, node);
529 idx = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
533 GArray *indices = g_array_new (FALSE, TRUE, sizeof (int));
535 g_array_append_val (indices, idx);
537 while (*end_p != ']')
549 idx = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
550 if (!(*end_p == ',' || *end_p == ']'))
552 g_array_unref (indices);
553 g_set_error (error, JSON_PATH_ERROR,
554 JSON_PATH_ERROR_INVALID_QUERY,
555 _("Invalid set definition '%*s'"),
561 g_array_append_val (indices, idx);
564 node = g_new0 (PathNode, 1);
565 node->node_type = JSON_PATH_NODE_ELEMENT_SET;
566 node->data.set.n_indices = indices->len;
567 node->data.set.indices = (int *) g_array_free (indices, FALSE);
568 nodes = g_list_prepend (nodes, node);
572 else if (*end_p == ':')
574 int slice_start = idx;
588 slice_end = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign;
601 slice_step = g_ascii_strtoll (end_p + 1, (char **) &end_p, 10) * sign;
606 g_set_error (error, JSON_PATH_ERROR,
607 JSON_PATH_ERROR_INVALID_QUERY,
608 _("Invalid slice definition '%*s'"),
614 node = g_new0 (PathNode, 1);
615 node->node_type = JSON_PATH_NODE_ELEMENT_SLICE;
616 node->data.slice.start = slice_start;
617 node->data.slice.end = slice_end;
618 node->data.slice.step = slice_step;
619 nodes = g_list_prepend (nodes, node);
623 else if (*end_p == ']')
625 node = g_new0 (PathNode, 1);
626 node->node_type = JSON_PATH_NODE_CHILD_ELEMENT;
627 node->data.element_index = idx;
628 nodes = g_list_prepend (nodes, node);
634 g_set_error (error, JSON_PATH_ERROR,
635 JSON_PATH_ERROR_INVALID_QUERY,
636 _("Invalid array index definition '%*s'"),
646 nodes = g_list_prepend (nodes, node);
653 g_set_error(error, JSON_PATH_ERROR,
654 JSON_PATH_ERROR_INVALID_QUERY,
655 _("Invalid first character '%c'"),
665 nodes = g_list_reverse (nodes);
667 #ifdef JSON_ENABLE_DEBUG
668 if (JSON_HAS_DEBUG (PATH))
670 GString *buf = g_string_new (NULL);
672 g_list_foreach (nodes, json_path_foreach_print, buf);
674 g_message ("[PATH] " G_STRLOC ": expression '%s' => '%s'", expression, buf->str);
675 g_string_free (buf, TRUE);
677 #endif /* JSON_ENABLE_DEBUG */
679 g_list_free_full (path->nodes, path_node_free);
682 path->is_compiled = (path->nodes != NULL);
684 return path->nodes != NULL;
687 g_list_free_full (nodes, path_node_free);
693 walk_path_node (GList *path,
697 PathNode *node = path->data;
699 switch (node->node_type)
701 case JSON_PATH_NODE_ROOT:
702 if (path->next != NULL)
703 walk_path_node (path->next, root, results);
705 json_array_add_element (results, json_node_copy (root));
708 case JSON_PATH_NODE_CHILD_MEMBER:
709 if (JSON_NODE_HOLDS_OBJECT (root))
711 JsonObject *object = json_node_get_object (root);
713 if (json_object_has_member (object, node->data.member_name))
715 JsonNode *member = json_object_get_member (object, node->data.member_name);
717 if (path->next == NULL)
719 JSON_NOTE (PATH, "end of path at member '%s'", node->data.member_name);
720 json_array_add_element (results, json_node_copy (member));
723 walk_path_node (path->next, member, results);
728 case JSON_PATH_NODE_CHILD_ELEMENT:
729 if (JSON_NODE_HOLDS_ARRAY (root))
731 JsonArray *array = json_node_get_array (root);
733 if (json_array_get_length (array) >= node->data.element_index)
735 JsonNode *element = json_array_get_element (array, node->data.element_index);
737 if (path->next == NULL)
739 JSON_NOTE (PATH, "end of path at element '%d'", node->data.element_index);
740 json_array_add_element (results, json_node_copy (element));
743 walk_path_node (path->next, element, results);
748 case JSON_PATH_NODE_RECURSIVE_DESCENT:
750 PathNode *tmp = path->next->data;
752 switch (json_node_get_node_type (root))
754 case JSON_NODE_OBJECT:
756 JsonObject *object = json_node_get_object (root);
759 members = json_object_get_members (object);
760 for (l = members; l != NULL; l = l->next)
762 JsonNode *m = json_object_get_member (object, l->data);
764 if (tmp->node_type == JSON_PATH_NODE_CHILD_MEMBER &&
765 strcmp (tmp->data.member_name, l->data) == 0)
767 JSON_NOTE (PATH, "entering '%s'", tmp->data.member_name);
768 walk_path_node (path->next, root, results);
772 JSON_NOTE (PATH, "recursing into '%s'", (char *) l->data);
773 walk_path_node (path, m, results);
776 g_list_free (members);
780 case JSON_NODE_ARRAY:
782 JsonArray *array = json_node_get_array (root);
786 members = json_array_get_elements (array);
787 for (l = members, i = 0; l != NULL; l = l->next, i += 1)
789 JsonNode *m = l->data;
791 if (tmp->node_type == JSON_PATH_NODE_CHILD_ELEMENT &&
792 tmp->data.element_index == i)
794 JSON_NOTE (PATH, "entering '%d'", tmp->data.element_index);
795 walk_path_node (path->next, root, results);
799 JSON_NOTE (PATH, "recursing into '%d'", i);
800 walk_path_node (path, m, results);
803 g_list_free (members);
813 case JSON_PATH_NODE_WILDCARD_MEMBER:
814 if (JSON_NODE_HOLDS_OBJECT (root))
816 JsonObject *object = json_node_get_object (root);
819 members = json_object_get_members (object);
820 for (l = members; l != NULL; l = l->next)
822 JsonNode *member = json_object_get_member (object, l->data);
824 if (path->next != NULL)
825 walk_path_node (path->next, member, results);
828 JSON_NOTE (PATH, "glob match member '%s'", (char *) l->data);
829 json_array_add_element (results, json_node_copy (member));
832 g_list_free (members);
835 json_array_add_element (results, json_node_copy (root));
838 case JSON_PATH_NODE_WILDCARD_ELEMENT:
839 if (JSON_NODE_HOLDS_ARRAY (root))
841 JsonArray *array = json_node_get_array (root);
845 elements = json_array_get_elements (array);
846 for (l = elements, i = 0; l != NULL; l = l->next, i += 1)
848 JsonNode *element = l->data;
850 if (path->next != NULL)
851 walk_path_node (path->next, element, results);
854 JSON_NOTE (PATH, "glob match element '%d'", i);
855 json_array_add_element (results, json_node_copy (element));
858 g_list_free (elements);
861 json_array_add_element (results, json_node_copy (root));
864 case JSON_PATH_NODE_ELEMENT_SET:
865 if (JSON_NODE_HOLDS_ARRAY (root))
867 JsonArray *array = json_node_get_array (root);
870 for (i = 0; i < node->data.set.n_indices; i += 1)
872 int idx = node->data.set.indices[i];
873 JsonNode *element = json_array_get_element (array, idx);
875 if (path->next != NULL)
876 walk_path_node (path->next, element, results);
879 JSON_NOTE (PATH, "set element '%d'", idx);
880 json_array_add_element (results, json_node_copy (element));
886 case JSON_PATH_NODE_ELEMENT_SLICE:
887 if (JSON_NODE_HOLDS_ARRAY (root))
889 JsonArray *array = json_node_get_array (root);
892 if (node->data.slice.start < 0)
894 start = json_array_get_length (array)
895 + node->data.slice.start;
897 end = json_array_get_length (array)
898 + node->data.slice.end;
902 start = node->data.slice.start;
903 end = node->data.slice.end;
906 for (i = start; i < end; i += node->data.slice.step)
908 JsonNode *element = json_array_get_element (array, i);
910 if (path->next != NULL)
911 walk_path_node (path->next, element, results);
914 JSON_NOTE (PATH, "slice element '%d'", i);
915 json_array_add_element (results, json_node_copy (element));
928 * @path: a compiled #JsonPath
931 * Matches the JSON tree pointed by @root using the expression compiled
932 * into the #JsonPath.
934 * The matching #JsonNodes will be copied into a #JsonArray and
935 * returned wrapped in a #JsonNode.
937 * Return value: (transfer full): a newly-created #JsonNode of type
938 * %JSON_NODE_ARRAY containing an array of matching #JsonNodes.
939 * Use json_node_unref() when done
944 json_path_match (JsonPath *path,
950 g_return_val_if_fail (JSON_IS_PATH (path), NULL);
951 g_return_val_if_fail (path->is_compiled, NULL);
952 g_return_val_if_fail (root != NULL, NULL);
954 results = json_array_new ();
956 walk_path_node (path->nodes, root, results);
958 retval = json_node_new (JSON_NODE_ARRAY);
959 json_node_take_array (retval, results);
966 * @expression: a JSONPath expression
967 * @root: the root of a JSON tree
968 * @error: return location for a #GError, or %NULL
970 * Queries a JSON tree using a JSONPath expression.
972 * This function is a simple wrapper around json_path_new(),
973 * json_path_compile() and json_path_match(). It implicitly
974 * creates a #JsonPath instance, compiles @expression and
975 * matches it against the JSON tree pointed by @root.
977 * Return value: (transfer full): a newly-created #JsonNode of type
978 * %JSON_NODE_ARRAY containing an array of matching #JsonNodes.
979 * Use json_node_unref() when done
984 json_path_query (const char *expression,
988 JsonPath *path = json_path_new ();
991 if (!json_path_compile (path, expression, error))
993 g_object_unref (path);
997 retval = json_path_match (path, root);
999 g_object_unref (path);