2 * pathx.c: handling path expressions
4 * Copyright (C) 2007-2015 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
34 static const char *const errcodes[] = {
37 "illegal string literal",
39 "string missing ending ' or \"",
45 "internal error", /* PATHX_EINTERNAL */
46 "type error", /* PATHX_ETYPE */
47 "undefined variable", /* PATHX_ENOVAR */
48 "garbage at end of path expression", /* PATHX_EEND */
49 "no match for path expression", /* PATHX_ENOMATCH */
50 "wrong number of arguments in function call", /* PATHX_EARITY */
51 "invalid regular expression", /* PATHX_EREGEXP */
52 "too many matches", /* PATHX_EMMATCH */
53 "wrong flag for regexp" /* PATHX_EREGEXPFLAG */
57 * Path expressions are strings that use a notation modelled on XPath.
61 T_NONE = 0, /* Not a type */
89 OP_RE_MATCH, /* '=~' */
90 OP_RE_NOMATCH, /* '!~' */
111 /* This array is indexed by enum axis */
112 static const char *const axis_names[] = {
116 "descendant-or-self",
124 static const char *const axis_sep = "::";
126 /* The characters that can follow a name in a location expression (aka path)
127 * The parser will assume that name (path component) is finished when it
128 * encounters any of these characters, unless they are escaped by preceding
131 * See parse_name for the gory details
133 static const char const name_follow[] = "][|/=()!,";
135 /* Doubly linked list of location steps. Besides the information from the
136 * path expression, also contains information to iterate over a node set,
137 * in particular, the context node CTX for the step, and the current node
138 * CUR within that context.
143 char *name; /* NULL to match any name */
144 struct pred *predicates;
147 /* Initialise the root nodeset with the first step */
148 static struct tree *step_root(struct step *step, struct tree *ctx,
149 struct tree *root_ctx);
150 /* Iteration over the nodes on a step, ignoring the predicates */
151 static struct tree *step_first(struct step *step, struct tree *ctx);
152 static struct tree *step_next(struct step *step, struct tree *ctx,
155 struct pathx_symtab {
156 struct pathx_symtab *next;
163 struct nodeset *nodeset;
181 typedef uint32_t value_ind_t;
186 struct nodeset *nodeset; /* T_NODESET */
187 int number; /* T_NUMBER */
188 char *string; /* T_STRING */
189 bool boolval; /* T_BOOLEAN */
190 struct regexp *regexp; /* T_REGEXP */
198 struct { /* E_FILTER */
199 struct expr *primary;
200 struct pred *predicates;
201 struct locpath *locpath;
203 struct { /* E_BINARY */
208 value_ind_t value_ind; /* E_VALUE */
209 char *ident; /* E_VAR */
211 const struct func *func;
217 struct locpath_trace {
223 /* Internal state of the evaluator/parser */
225 pathx_errcode_t errcode;
230 const char *txt; /* Entire expression */
231 const char *pos; /* Current position within TXT during parsing */
233 struct tree *ctx; /* The current node */
237 struct tree *root_ctx; /* Root context for relative paths */
239 /* A table of all values. The table is dynamically reallocated, i.e.
240 * pointers to struct value should not be used across calls that
241 * might allocate new values
243 * value_pool[0] is always the boolean false, and value_pool[1]
244 * always the boolean true
246 struct value *value_pool;
247 value_ind_t value_pool_used;
248 value_ind_t value_pool_size;
249 /* Stack of values (as indices into value_pool), with bottom of
250 stack in values[0] */
254 /* Stack of expressions, with bottom of stack in exprs[0] */
258 /* Trace of a locpath evaluation, needed by pathx_expand_tree.
259 Generally NULL, unless a trace is needed.
261 struct locpath_trace *locpath_trace;
262 /* Symbol table for variable lookups */
263 struct pathx_symtab *symtab;
264 /* Error structure, used to communicate errors to struct augeas;
265 * we never own this structure, and therefore never free it */
269 /* We consider NULL and the empty string to be equal */
271 static inline int streqx(const char *s1, const char *s2) {
273 return (s2 == NULL || strlen(s2) == 0);
275 return strlen(s1) == 0;
276 return STREQ(s1, s2);
281 typedef void (*func_impl_t)(struct state *state, int nargs);
287 const enum type *arg_types;
291 static void func_last(struct state *state, int nargs);
292 static void func_position(struct state *state, int nargs);
293 static void func_count(struct state *state, int nargs);
294 static void func_label(struct state *state, int nargs);
295 static void func_regexp(struct state *state, int nargs);
296 static void func_regexp_flag(struct state *state, int nargs);
297 static void func_glob(struct state *state, int nargs);
298 static void func_int(struct state *state, int nargs);
300 static const enum type const arg_types_nodeset[] = { T_NODESET };
301 static const enum type const arg_types_string[] = { T_STRING };
302 static const enum type const arg_types_bool[] = { T_BOOLEAN };
303 static const enum type const arg_types_string_string[] = { T_STRING, T_STRING };
304 static const enum type const arg_types_nodeset_string[] = { T_NODESET, T_STRING };
306 static const struct func builtin_funcs[] = {
307 { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
309 { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
310 .impl = func_position },
311 { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
312 .impl = func_label },
313 { .name = "count", .arity = 1, .type = T_NUMBER,
314 .arg_types = arg_types_nodeset,
315 .impl = func_count },
316 { .name = "regexp", .arity = 1, .type = T_REGEXP,
317 .arg_types = arg_types_string,
318 .impl = func_regexp },
319 { .name = "regexp", .arity = 1, .type = T_REGEXP,
320 .arg_types = arg_types_nodeset,
321 .impl = func_regexp },
322 { .name = "regexp", .arity = 2, .type = T_REGEXP,
323 .arg_types = arg_types_string_string,
324 .impl = func_regexp_flag },
325 { .name = "regexp", .arity = 2, .type = T_REGEXP,
326 .arg_types = arg_types_nodeset_string,
327 .impl = func_regexp_flag },
328 { .name = "glob", .arity = 1, .type = T_REGEXP,
329 .arg_types = arg_types_string,
331 { .name = "glob", .arity = 1, .type = T_REGEXP,
332 .arg_types = arg_types_nodeset,
334 { .name = "int", .arity = 1, .type = T_NUMBER,
335 .arg_types = arg_types_string, .impl = func_int },
336 { .name = "int", .arity = 1, .type = T_NUMBER,
337 .arg_types = arg_types_nodeset, .impl = func_int },
338 { .name = "int", .arity = 1, .type = T_NUMBER,
339 .arg_types = arg_types_bool, .impl = func_int }
342 #define RET_ON_ERROR \
343 if (state->errcode != PATHX_NOERROR) return
345 #define RET0_ON_ERROR \
346 if (state->errcode != PATHX_NOERROR) return 0
348 #define STATE_ERROR(state, err) \
350 state->errcode = err; \
351 state->file = __FILE__; \
352 state->line = __LINE__; \
355 #define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
357 #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
359 #define ENOMEM_ON_NULL(state, v) \
362 STATE_ERROR(state, PATHX_ENOMEM); \
368 * Free the various data structures
371 static void free_expr(struct expr *expr);
373 static void free_pred(struct pred *pred) {
377 for (int i=0; i < pred->nexpr; i++) {
378 free_expr(pred->exprs[i]);
384 static void free_step(struct step *step) {
385 while (step != NULL) {
386 struct step *del = step;
389 free_pred(del->predicates);
394 static void free_locpath(struct locpath *locpath) {
397 while (locpath->steps != NULL) {
398 struct step *step = locpath->steps;
399 locpath->steps = step->next;
401 free_pred(step->predicates);
407 static void free_expr(struct expr *expr) {
412 free_expr(expr->primary);
413 free_pred(expr->predicates);
414 free_locpath(expr->locpath);
417 free_expr(expr->left);
418 free_expr(expr->right);
426 for (int i=0; i < expr->func->arity; i++)
427 free_expr(expr->args[i]);
436 static void free_nodeset(struct nodeset *ns) {
443 /* Free all objects used by VALUE, but not VALUE itself */
444 static void release_value(struct value *v) {
450 free_nodeset(v->nodeset);
459 unref(v->regexp, regexp);
466 static void free_state(struct state *state) {
470 for(int i=0; i < state->exprs_used; i++)
471 free_expr(state->exprs[i]);
474 for(int i=0; i < state->value_pool_used; i++)
475 release_value(state->value_pool + i);
476 free(state->value_pool);
481 void free_pathx(struct pathx *pathx) {
484 free_state(pathx->state);
491 static struct nodeset *make_nodeset(struct state *state) {
492 struct nodeset *result;
493 if (ALLOC(result) < 0)
498 /* Add NODE to NS if it is not in NS yet. This relies on the flag
499 * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
500 * as soon as we are done adding nodes to it.
502 static void ns_add(struct nodeset *ns, struct tree *node,
503 struct state *state) {
506 if (ns->used >= ns->size) {
507 size_t size = 2 * ns->size;
508 if (size < 10) size = 10;
509 if (REALLOC_N(ns->nodes, size) < 0)
513 ns->nodes[ns->used] = node;
518 static void ns_clear_added(struct nodeset *ns) {
519 for (int i=0; i < ns->used; i++)
520 ns->nodes[i]->added = 0;
523 static struct nodeset *
524 clone_nodeset(struct nodeset *ns, struct state *state)
526 struct nodeset *clone;
527 if (ALLOC(clone) < 0) {
531 if (ALLOC_N(clone->nodes, ns->used) < 0) {
536 clone->used = ns->used;
537 clone->size = ns->used;
538 for (int i=0; i < ns->used; i++)
539 clone->nodes[i] = ns->nodes[i];
546 static value_ind_t make_value(enum type tag, struct state *state) {
547 assert(tag != T_BOOLEAN);
549 if (state->value_pool_used >= state->value_pool_size) {
550 value_ind_t new_size = 2*state->value_pool_size;
551 if (new_size <= state->value_pool_size) {
555 if (REALLOC_N(state->value_pool, new_size) < 0) {
559 state->value_pool_size = new_size;
561 state->value_pool[state->value_pool_used].tag = tag;
562 state->value_pool[state->value_pool_used].nodeset = NULL;
563 return state->value_pool_used++;
567 static value_ind_t clone_value(struct value *v, struct state *state) {
568 value_ind_t vind = make_value(v->tag, state);
570 struct value *clone = state->value_pool + vind;
574 clone->nodeset = clone_nodeset(v->nodeset, state);
577 clone->number = v->number;
580 clone->string = strdup(v->string);
581 if (clone->string == NULL) {
587 clone->boolval = v->boolval;
590 clone->regexp = ref(v->regexp);
598 static value_ind_t pop_value_ind(struct state *state) {
599 if (state->values_used > 0) {
600 state->values_used -= 1;
601 return state->values[state->values_used];
603 STATE_ERROR(state, PATHX_EINTERNAL);
609 static struct value *pop_value(struct state *state) {
610 value_ind_t vind = pop_value_ind(state);
611 if (HAS_ERROR(state))
613 return state->value_pool + vind;
616 static void push_value(value_ind_t vind, struct state *state) {
617 if (state->values_used >= state->values_size) {
618 size_t new_size = 2*state->values_size;
619 if (new_size == 0) new_size = 8;
620 if (REALLOC_N(state->values, new_size) < 0) {
624 state->values_size = new_size;
626 state->values[state->values_used++] = vind;
629 static void push_boolean_value(int b, struct state *state) {
630 push_value(b != 0, state);
634 static struct value *expr_value(struct expr *expr, struct state *state) {
635 return state->value_pool + expr->value_ind;
638 /*************************************************************************
640 ************************************************************************/
641 static void eval_expr(struct expr *expr, struct state *state);
644 #define ensure_arity(min, max) \
645 if (nargs < min || nargs > max) { \
646 STATE_ERROR(state, PATHX_EINTERNAL); \
650 static void func_last(struct state *state, int nargs) {
652 value_ind_t vind = make_value(T_NUMBER, state);
655 state->value_pool[vind].number = state->ctx_len;
656 push_value(vind, state);
659 static void func_position(struct state *state, int nargs) {
661 value_ind_t vind = make_value(T_NUMBER, state);
664 state->value_pool[vind].number = state->ctx_pos;
665 push_value(vind, state);
668 static void func_count(struct state *state, int nargs) {
670 value_ind_t vind = make_value(T_NUMBER, state);
673 struct value *ns = pop_value(state);
674 state->value_pool[vind].number = ns->nodeset->used;
675 push_value(vind, state);
678 static void func_label(struct state *state, int nargs) {
680 value_ind_t vind = make_value(T_STRING, state);
685 if (state->ctx->label)
686 s = strdup(state->ctx->label);
693 state->value_pool[vind].string = s;
694 push_value(vind, state);
697 static void func_int(struct state *state, int nargs) {
699 value_ind_t vind = make_value(T_NUMBER, state);
703 struct value *v = pop_value(state);
704 if (v->tag == T_BOOLEAN) {
707 const char *s = NULL;
708 if (v->tag == T_STRING) {
712 if (v->nodeset->used != 1) {
713 STATE_ERROR(state, PATHX_EMMATCH);
716 s = v->nodeset->nodes[0]->value;
720 r = xstrtoint64(s, 10, &i);
722 STATE_ERROR(state, PATHX_ENUMBER);
727 state->value_pool[vind].number = i;
728 push_value(vind, state);
731 static struct regexp *
732 nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
733 struct regexp *result = NULL;
734 struct regexp **rx = NULL;
737 for (int i = 0; i < ns->used; i++) {
738 if (ns->nodes[i]->value != NULL)
743 /* If the nodeset is empty, make sure we produce a regexp
744 * that never matches anything */
745 result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
747 if (ALLOC_N(rx, ns->used) < 0)
749 for (int i=0; i < ns->used; i++) {
750 if (ns->nodes[i]->value == NULL)
754 rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
756 rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
760 result = regexp_union_n(info, ns->used, rx);
765 for (int i=0; i < ns->used; i++)
766 unref(rx[i], regexp);
772 static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
773 value_ind_t vind = make_value(T_REGEXP, state);
778 struct value *v = pop_value(state);
779 struct regexp *rx = NULL;
781 if (v->tag == T_STRING) {
783 rx = make_regexp_from_glob(state->error->info, v->string);
785 rx = make_regexp_unescape(state->error->info, v->string, nocase);
786 } else if (v->tag == T_NODESET) {
787 rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
797 state->value_pool[vind].regexp = rx;
798 r = regexp_compile(rx);
801 regexp_check(rx, &msg);
802 state->errmsg = strdup(msg);
803 STATE_ERROR(state, PATHX_EREGEXP);
806 push_value(vind, state);
809 static void func_regexp(struct state *state, int nargs) {
811 func_regexp_or_glob(state, 0, 0);
814 static void func_regexp_flag(struct state *state, int nargs) {
817 struct value *f = pop_value(state);
819 if (STREQ("i", f->string))
822 STATE_ERROR(state, PATHX_EREGEXPFLAG);
824 func_regexp_or_glob(state, 0, nocase);
827 static void func_glob(struct state *state, int nargs) {
829 func_regexp_or_glob(state, 1, 0);
832 static bool coerce_to_bool(struct value *v) {
835 return v->nodeset->used > 0;
841 return v->number > 0;
844 return strlen(v->string) > 0;
854 static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
856 for (int i1=0; i1 < ns1->used; i1++) {
857 struct tree *t1 = ns1->nodes[i1];
858 for (int i2=0; i2 < ns2->used; i2++) {
859 struct tree *t2 = ns2->nodes[i2];
861 if (!streqx(t1->value, t2->value))
864 if (streqx(t1->value, t2->value))
872 static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
874 for (int i=0; i < ns->used; i++) {
875 struct tree *t = ns->nodes[i];
877 if (!streqx(t->value, s))
880 if (streqx(t->value, s))
887 static void eval_eq(struct state *state, int neq) {
888 struct value *r = pop_value(state);
889 struct value *l = pop_value(state);
892 if (l->tag == T_NODESET && r->tag == T_NODESET) {
893 res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
894 } else if (l->tag == T_NODESET) {
895 res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
896 } else if (r->tag == T_NODESET) {
897 res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
898 } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
900 res = (l->number != r->number);
902 res = (l->number == r->number);
904 assert(l->tag == T_STRING);
905 assert(r->tag == T_STRING);
906 res = streqx(l->string, r->string);
912 push_boolean_value(res, state);
915 static void eval_arith(struct state *state, enum binary_op op) {
916 value_ind_t vind = make_value(T_NUMBER, state);
917 struct value *r = pop_value(state);
918 struct value *l = pop_value(state);
921 assert(l->tag == T_NUMBER);
922 assert(r->tag == T_NUMBER);
927 res = l->number + r->number;
928 else if (op == OP_MINUS)
929 res = l->number - r->number;
930 else if (op == OP_STAR)
931 res = l->number * r->number;
935 state->value_pool[vind].number = res;
936 push_value(vind, state);
939 static void eval_rel(struct state *state, bool greater, bool equal) {
943 /* We always check l < r or l <= r */
945 l = pop_value(state);
946 r = pop_value(state);
948 r = pop_value(state);
949 l = pop_value(state);
951 if (l->tag == T_NUMBER) {
953 res = (l->number <= r->number);
955 res = (l->number < r->number);
956 } else if (l->tag == T_STRING) {
957 int cmp = strcmp(l->string, r->string);
966 push_boolean_value(res, state);
969 static void eval_and_or(struct state *state, enum binary_op op) {
970 struct value *r = pop_value(state);
971 struct value *l = pop_value(state);
972 bool left = coerce_to_bool(l);
973 bool right = coerce_to_bool(r);
977 push_boolean_value(left && right, state);
979 push_boolean_value(left || right, state);
982 static bool eval_re_match_str(struct state *state, struct regexp *rx,
989 r = regexp_match(rx, str, strlen(str), 0, NULL);
991 STATE_ERROR(state, PATHX_EINTERNAL);
992 } else if (r == -3) {
993 /* We should never get this far; func_regexp should catch
997 return r == strlen(str);
1000 static void eval_union(struct state *state) {
1001 value_ind_t vind = make_value(T_NODESET, state);
1002 struct value *r = pop_value(state);
1003 struct value *l = pop_value(state);
1004 struct nodeset *res = NULL;
1006 assert(l->tag == T_NODESET);
1007 assert(r->tag == T_NODESET);
1011 res = clone_nodeset(l->nodeset, state);
1013 for (int i=0; i < r->nodeset->used; i++) {
1014 ns_add(res, r->nodeset->nodes[i], state);
1015 if (HAS_ERROR(state))
1018 state->value_pool[vind].nodeset = res;
1019 push_value(vind, state);
1021 ns_clear_added(res);
1024 static void eval_concat_string(struct state *state) {
1025 value_ind_t vind = make_value(T_STRING, state);
1026 struct value *r = pop_value(state);
1027 struct value *l = pop_value(state);
1032 if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1036 strcpy(res, l->string);
1037 strcat(res, r->string);
1038 state->value_pool[vind].string = res;
1039 push_value(vind, state);
1042 static void eval_concat_regexp(struct state *state) {
1043 value_ind_t vind = make_value(T_REGEXP, state);
1044 struct value *r = pop_value(state);
1045 struct value *l = pop_value(state);
1046 struct regexp *rx = NULL;
1050 rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1056 state->value_pool[vind].regexp = rx;
1057 push_value(vind, state);
1060 static void eval_re_match(struct state *state, enum binary_op op) {
1061 struct value *rx = pop_value(state);
1062 struct value *v = pop_value(state);
1064 bool result = false;
1066 if (v->tag == T_STRING) {
1067 result = eval_re_match_str(state, rx->regexp, v->string);
1069 } else if (v->tag == T_NODESET) {
1070 for (int i=0; i < v->nodeset->used && result == false; i++) {
1071 struct tree *t = v->nodeset->nodes[i];
1072 result = eval_re_match_str(state, rx->regexp, t->value);
1076 if (op == OP_RE_NOMATCH)
1078 push_boolean_value(result, state);
1081 static void eval_binary(struct expr *expr, struct state *state) {
1082 eval_expr(expr->left, state);
1083 eval_expr(expr->right, state);
1094 eval_rel(state, false, false);
1097 eval_rel(state, false, true);
1100 eval_rel(state, true, false);
1103 eval_rel(state, true, true);
1106 if (expr->type == T_NUMBER)
1107 eval_arith(state, expr->op);
1108 else if (expr->type == T_STRING)
1109 eval_concat_string(state);
1110 else if (expr->type == T_REGEXP)
1111 eval_concat_regexp(state);
1115 eval_arith(state, expr->op);
1119 eval_and_or(state, expr->op);
1126 eval_re_match(state, expr->op);
1133 static void eval_app(struct expr *expr, struct state *state) {
1134 assert(expr->tag == E_APP);
1136 for (int i=0; i < expr->func->arity; i++) {
1137 eval_expr(expr->args[i], state);
1140 expr->func->impl(state, expr->func->arity);
1143 static bool eval_pred(struct expr *expr, struct state *state) {
1144 eval_expr(expr, state);
1147 struct value *v = pop_value(state);
1152 return (state->ctx_pos == v->number);
1154 return v->nodeset->used > 0;
1161 /* Remove COUNT successive entries from NS. The first entry to remove is at
1163 static void ns_remove(struct nodeset *ns, int ind, int count) {
1166 memmove(ns->nodes + ind, ns->nodes + ind+count,
1167 sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1172 * Remove all nodes from NS for which one of PRED is false
1174 static void ns_filter(struct nodeset *ns, struct pred *predicates,
1175 struct state *state) {
1176 if (predicates == NULL)
1179 struct tree *old_ctx = state->ctx;
1180 uint old_ctx_len = state->ctx_len;
1181 uint old_ctx_pos = state->ctx_pos;
1183 for (int p=0; p < predicates->nexpr; p++) {
1184 int first_bad = -1; /* The index of the first non-matching node */
1185 state->ctx_len = ns->used;
1187 for (int i=0; i < ns->used; state->ctx_pos++) {
1188 state->ctx = ns->nodes[i];
1189 bool match = eval_pred(predicates->exprs[p], state);
1191 /* We remove non-matching nodes from NS in batches; this logic
1192 * makes sure that we only call ns_remove at the end of a run
1193 * of non-matching nodes
1196 if (first_bad >= 0) {
1197 ns_remove(ns, first_bad, i - first_bad);
1204 if (first_bad == -1)
1209 if (first_bad >= 0) {
1210 ns_remove(ns, first_bad, ns->used - first_bad);
1214 state->ctx = old_ctx;
1215 state->ctx_pos = old_ctx_pos;
1216 state->ctx_len = old_ctx_len;
1219 /* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1220 static bool position_pred(struct pred *pred) {
1221 return pred != NULL &&
1223 pred->exprs[0]->tag == E_VALUE &&
1224 pred->exprs[0]->type == T_NUMBER;
1227 /* Return the tree node at the position implied by STEP->PREDICATES. It is
1228 assumed and required that STEP->PREDICATES is actually a
1231 This method hand-optimizes the important case of a path expression like
1234 static struct tree *position_filter(struct nodeset *ns,
1236 struct state *state) {
1237 int value_ind = step->predicates->exprs[0]->value_ind;
1238 int number = state->value_pool[value_ind].number;
1241 for (int i=0; i < ns->used; i++) {
1242 for (struct tree *node = step_first(step, ns->nodes[i]);
1244 node = step_next(step, ns->nodes[i], node), pos++) {
1253 /* Return an array of nodesets, one for each step in the locpath.
1255 * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1256 * contain the nodes that matched the entire locpath
1258 static void ns_from_locpath(struct locpath *lp, uint *maxns,
1259 struct nodeset ***ns,
1260 const struct nodeset *root,
1261 struct state *state) {
1262 struct tree *old_ctx = state->ctx;
1266 list_for_each(step, lp->steps)
1268 if (ALLOC_N(*ns, *maxns+1) < 0) {
1269 STATE_ERROR(state, PATHX_ENOMEM);
1272 for (int i=0; i <= *maxns; i++) {
1273 (*ns)[i] = make_nodeset(state);
1274 if (HAS_ERROR(state))
1279 struct step *first_step = NULL;
1281 first_step = lp->steps;
1283 struct tree *root_tree;
1284 root_tree = step_root(first_step, state->ctx, state->root_ctx);
1285 ns_add((*ns)[0], root_tree, state);
1286 ns_clear_added((*ns)[0]);
1288 for (int i=0; i < root->used; i++)
1289 ns_add((*ns)[0], root->nodes[i], state);
1290 ns_clear_added((*ns)[0]);
1293 if (HAS_ERROR(state))
1297 list_for_each(step, lp->steps) {
1298 struct nodeset *work = (*ns)[cur_ns];
1299 struct nodeset *next = (*ns)[cur_ns + 1];
1300 if (position_pred(step->predicates)) {
1301 struct tree *node = position_filter(work, step, state);
1303 ns_add(next, node, state);
1304 ns_clear_added(next);
1307 for (int i=0; i < work->used; i++) {
1308 for (struct tree *node = step_first(step, work->nodes[i]);
1310 node = step_next(step, work->nodes[i], node)) {
1311 ns_add(next, node, state);
1314 ns_clear_added(next);
1315 ns_filter(next, step->predicates, state);
1316 if (HAS_ERROR(state))
1322 state->ctx = old_ctx;
1326 for (int i=0; i <= *maxns; i++)
1327 free_nodeset((*ns)[i]);
1330 state->ctx = old_ctx;
1334 static void eval_filter(struct expr *expr, struct state *state) {
1335 struct locpath *lp = expr->locpath;
1336 struct nodeset **ns = NULL;
1337 struct locpath_trace *lpt = state->locpath_trace;
1340 state->locpath_trace = NULL;
1341 if (expr->primary == NULL) {
1342 ns_from_locpath(lp, &maxns, &ns, NULL, state);
1344 eval_expr(expr->primary, state);
1346 value_ind_t primary_ind = pop_value_ind(state);
1347 struct value *primary = state->value_pool + primary_ind;
1348 assert(primary->tag == T_NODESET);
1349 ns_filter(primary->nodeset, expr->predicates, state);
1350 /* Evaluating predicates might have reallocated the value_pool */
1351 primary = state->value_pool + primary_ind;
1352 ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1356 value_ind_t vind = make_value(T_NODESET, state);
1358 state->value_pool[vind].nodeset = ns[maxns];
1359 push_value(vind, state);
1362 assert(lpt->ns == NULL);
1363 assert(lpt->lp == NULL);
1367 state->locpath_trace = lpt;
1369 for (int i=0; i < maxns; i++)
1370 free_nodeset(ns[i]);
1375 static struct value *lookup_var(const char *ident, struct state *state) {
1376 list_for_each(tab, state->symtab) {
1377 if (STREQ(ident, tab->name))
1383 static void eval_var(struct expr *expr, struct state *state) {
1384 struct value *v = lookup_var(expr->ident, state);
1385 value_ind_t vind = clone_value(v, state);
1387 push_value(vind, state);
1390 static void eval_expr(struct expr *expr, struct state *state) {
1392 switch (expr->tag) {
1394 eval_filter(expr, state);
1397 eval_binary(expr, state);
1400 push_value(expr->value_ind, state);
1403 eval_var(expr, state);
1406 eval_app(expr, state);
1413 /*************************************************************************
1415 *************************************************************************/
1417 static void check_expr(struct expr *expr, struct state *state);
1419 /* Typecheck a list of predicates. A predicate is a function of
1420 * one of the following types:
1422 * T_NODESET -> T_BOOLEAN
1423 * T_NUMBER -> T_BOOLEAN (position test)
1424 * T_BOOLEAN -> T_BOOLEAN
1426 static void check_preds(struct pred *pred, struct state *state) {
1429 for (int i=0; i < pred->nexpr; i++) {
1430 struct expr *e = pred->exprs[i];
1431 check_expr(e, state);
1433 if (e->type != T_NODESET && e->type != T_NUMBER &&
1434 e->type != T_BOOLEAN) {
1435 STATE_ERROR(state, PATHX_ETYPE);
1441 static void check_filter(struct expr *expr, struct state *state) {
1442 assert(expr->tag == E_FILTER);
1443 struct locpath *locpath = expr->locpath;
1445 if (expr->primary != NULL) {
1446 check_expr(expr->primary, state);
1447 if (expr->primary->type != T_NODESET) {
1448 STATE_ERROR(state, PATHX_ETYPE);
1451 check_preds(expr->predicates, state);
1454 list_for_each(s, locpath->steps) {
1455 check_preds(s->predicates, state);
1458 expr->type = T_NODESET;
1461 static void check_app(struct expr *expr, struct state *state) {
1462 assert(expr->tag == E_APP);
1464 for (int i=0; i < expr->func->arity; i++) {
1465 check_expr(expr->args[i], state);
1470 for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1471 const struct func *fn = builtin_funcs + f;
1472 if (STRNEQ(expr->func->name, fn->name))
1474 if (expr->func->arity != fn->arity)
1478 for (int i=0; i < expr->func->arity; i++) {
1479 if (expr->args[i]->type != fn->arg_types[i]) {
1488 if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1489 expr->func = builtin_funcs + f;
1490 expr->type = expr->func->type;
1492 STATE_ERROR(state, PATHX_ETYPE);
1496 /* Check the binary operators. Type rules:
1498 * '=', '!=' : T_NODESET -> T_NODESET -> T_BOOLEAN
1499 * T_STRING -> T_NODESET -> T_BOOLEAN
1500 * T_NODESET -> T_STRING -> T_BOOLEAN
1501 * T_NUMBER -> T_NUMBER -> T_BOOLEAN
1504 * '<', '<=' : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1505 * T_STRING -> T_STRING -> T_BOOLEAN
1506 * '+' : T_NUMBER -> T_NUMBER -> T_NUMBER
1507 * T_STRING -> T_STRING -> T_STRING
1508 * T_REGEXP -> T_REGEXP -> T_REGEXP
1509 * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1511 * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1512 * '=~', '!~' : T_STRING -> T_REGEXP -> T_BOOLEAN
1513 * T_NODESET -> T_REGEXP -> T_BOOLEAN
1515 * '|' : T_NODESET -> T_NODESET -> T_NODESET
1517 * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1519 static void check_binary(struct expr *expr, struct state *state) {
1520 check_expr(expr->left, state);
1521 check_expr(expr->right, state);
1524 enum type l = expr->left->type;
1525 enum type r = expr->right->type;
1532 ok = ((l == T_NODESET || l == T_STRING)
1533 && (r == T_NODESET || r == T_STRING))
1534 || (l == T_NUMBER && r == T_NUMBER);;
1541 ok = (l == T_NUMBER && r == T_NUMBER)
1542 || (l == T_STRING && r == T_STRING);
1546 ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1551 ok = (l == T_NUMBER && r == T_NUMBER);
1555 ok = (l == T_NODESET && r == T_NODESET);
1565 ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1572 STATE_ERROR(state, PATHX_ETYPE);
1578 static void check_var(struct expr *expr, struct state *state) {
1579 struct value *v = lookup_var(expr->ident, state);
1581 STATE_ERROR(state, PATHX_ENOVAR);
1584 expr->type = v->tag;
1587 /* Typecheck an expression */
1588 static void check_expr(struct expr *expr, struct state *state) {
1592 check_filter(expr, state);
1595 check_binary(expr, state);
1598 expr->type = expr_value(expr, state)->tag;
1601 check_var(expr, state);
1604 check_app(expr, state);
1612 * Utility functions for the parser
1615 static void skipws(struct state *state) {
1616 while (isspace(*state->pos)) state->pos += 1;
1619 static int match(struct state *state, char m) {
1622 if (*state->pos == '\0')
1624 if (*state->pos == m) {
1631 static int peek(struct state *state, const char *chars) {
1632 return strchr(chars, *state->pos) != NULL;
1635 /* Return 1 if STATE->POS starts with TOKEN, followed by optional
1636 * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1637 * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1640 static int looking_at(struct state *state, const char *token,
1641 const char *follow) {
1642 if (STREQLEN(state->pos, token, strlen(token))) {
1643 const char *p = state->pos + strlen(token);
1644 while (isspace(*p)) p++;
1645 if (STREQLEN(p, follow, strlen(follow))) {
1646 state->pos = p + strlen(follow);
1653 /*************************************************************************
1655 *************************************************************************/
1657 static void parse_expr(struct state *state);
1659 static struct expr* pop_expr(struct state *state) {
1660 if (state->exprs_used > 0) {
1661 state->exprs_used -= 1;
1662 return state->exprs[state->exprs_used];
1664 STATE_ERROR(state, PATHX_EINTERNAL);
1670 static void push_expr(struct expr *expr, struct state *state) {
1671 if (state->exprs_used >= state->exprs_size) {
1672 size_t new_size = 2*state->exprs_size;
1673 if (new_size == 0) new_size = 8;
1674 if (REALLOC_N(state->exprs, new_size) < 0) {
1678 state->exprs_size = new_size;
1680 state->exprs[state->exprs_used++] = expr;
1683 static void push_new_binary_op(enum binary_op op, struct state *state) {
1684 struct expr *expr = NULL;
1685 if (ALLOC(expr) < 0) {
1690 expr->tag = E_BINARY;
1692 expr->right = pop_expr(state);
1693 expr->left = pop_expr(state);
1694 push_expr(expr, state);
1697 int pathx_escape_name(const char *in, char **out) {
1699 int num_to_escape = 0;
1704 for (p = in; *p; p++) {
1705 if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1709 if (num_to_escape == 0)
1712 if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1715 for (p = in, s = *out; *p; p++) {
1716 if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1725 * NameNoWS ::= [^][|/\= \t\n] | \\.
1726 * NameWS ::= [^][|/\=] | \\.
1727 * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1729 static char *parse_name(struct state *state) {
1730 const char *s = state->pos;
1733 while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1734 /* This is a hack: since we allow spaces in names, we need to avoid
1735 * gobbling up stuff that is in follow(Name), e.g. 'or' so that
1736 * things like [name1 or name2] still work.
1738 if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1739 STREQLEN(state->pos, " and ", strlen(" and ")))
1742 if (*state->pos == '\\') {
1744 if (*state->pos == '\0') {
1745 STATE_ERROR(state, PATHX_ENAME);
1752 /* Strip trailing white space */
1753 if (state->pos > s) {
1755 while (isspace(*state->pos) && state->pos >= s)
1760 if (state->pos == s) {
1761 STATE_ERROR(state, PATHX_ENAME);
1765 result = strndup(s, state->pos - s);
1766 if (result == NULL) {
1772 for (char *t = result; *t != '\0'; t++, p++) {
1783 * Predicate ::= "[" Expr "]" *
1785 static struct pred *parse_predicates(struct state *state) {
1786 struct pred *pred = NULL;
1789 while (match(state, L_BRACK)) {
1794 if (! match(state, R_BRACK)) {
1795 STATE_ERROR(state, PATHX_EPRED);
1804 if (ALLOC(pred) < 0) {
1808 pred->nexpr = nexpr;
1810 if (ALLOC_N(pred->exprs, nexpr) < 0) {
1816 for (int i = nexpr - 1; i >= 0; i--)
1817 pred->exprs[i] = pop_expr(state);
1823 * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1824 * AxisSpecifier ::= AxisName '::' | <epsilon>
1825 * AxisName ::= 'ancestor'
1826 * | 'ancestor-or-self'
1829 * | 'descendant-or-self'
1834 static struct step *parse_step(struct state *state) {
1836 int explicit_axis = 0, allow_predicates = 1;
1838 if (ALLOC(step) < 0) {
1844 for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1845 if (looking_at(state, axis_names[i], "::")) {
1852 if (! match(state, '*')) {
1853 step->name = parse_name(state);
1854 if (HAS_ERROR(state))
1856 if (! explicit_axis) {
1857 if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
1858 step->axis = STREQ(step->name, ".") ? SELF : PARENT;
1860 allow_predicates = 0;
1865 if (allow_predicates) {
1866 step->predicates = parse_predicates(state);
1867 if (HAS_ERROR(state))
1878 static struct step *make_step(enum axis axis, struct state *state) {
1879 struct step *result = NULL;
1881 if (ALLOC(result) < 0) {
1885 result->axis = axis;
1890 * RelativeLocationPath ::= Step
1891 * | RelativeLocationPath '/' Step
1892 * | AbbreviatedRelativeLocationPath
1893 * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
1895 * The above is the same as
1896 * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
1898 static struct locpath *
1899 parse_relative_location_path(struct state *state) {
1900 struct step *step = NULL;
1901 struct locpath *locpath = NULL;
1903 step = parse_step(state);
1904 if (HAS_ERROR(state))
1907 if (ALLOC(locpath) < 0) {
1911 list_append(locpath->steps, step);
1914 while (match(state, '/')) {
1915 if (*state->pos == '/') {
1917 step = make_step(DESCENDANT_OR_SELF, state);
1918 ENOMEM_ON_NULL(state, step);
1919 list_append(locpath->steps, step);
1921 step = parse_step(state);
1922 if (HAS_ERROR(state))
1924 list_append(locpath->steps, step);
1931 free_locpath(locpath);
1936 * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
1937 * AbsoluteLocationPath ::= '/' RelativeLocationPath?
1938 * | AbbreviatedAbsoluteLocationPath
1939 * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
1942 static void parse_location_path(struct state *state) {
1943 struct expr *expr = NULL;
1944 struct locpath *locpath = NULL;
1946 if (match(state, '/')) {
1947 if (*state->pos == '/') {
1949 locpath = parse_relative_location_path(state);
1950 if (HAS_ERROR(state))
1952 struct step *step = make_step(DESCENDANT_OR_SELF, state);
1953 if (HAS_ERROR(state))
1955 list_cons(locpath->steps, step);
1957 if (*state->pos != '\0') {
1958 locpath = parse_relative_location_path(state);
1960 if (ALLOC(locpath) < 0)
1963 struct step *step = make_step(ROOT, state);
1964 if (HAS_ERROR(state))
1966 list_cons(locpath->steps, step);
1969 locpath = parse_relative_location_path(state);
1972 if (ALLOC(expr) < 0)
1974 expr->tag = E_FILTER;
1975 expr->locpath = locpath;
1976 push_expr(expr, state);
1983 free_locpath(locpath);
1988 * Number ::= /[0-9]+/
1990 static void parse_number(struct state *state) {
1991 struct expr *expr = NULL;
1996 val = strtoul(state->pos, &end, 10);
1997 if (errno || end == state->pos || (int) val != val) {
1998 STATE_ERROR(state, PATHX_ENUMBER);
2004 if (ALLOC(expr) < 0)
2006 expr->tag = E_VALUE;
2007 expr->value_ind = make_value(T_NUMBER, state);
2008 if (HAS_ERROR(state))
2010 expr_value(expr, state)->number = val;
2012 push_expr(expr, state);
2023 * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2025 static void parse_literal(struct state *state) {
2028 struct expr *expr = NULL;
2030 if (*state->pos == '"')
2032 else if (*state->pos == '\'')
2035 STATE_ERROR(state, PATHX_ESTRING);
2041 while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2043 if (*state->pos != delim) {
2044 STATE_ERROR(state, PATHX_EDELIM);
2049 if (ALLOC(expr) < 0)
2051 expr->tag = E_VALUE;
2052 expr->value_ind = make_value(T_STRING, state);
2053 if (HAS_ERROR(state))
2055 expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2056 if (expr_value(expr, state)->string == NULL)
2059 push_expr(expr, state);
2070 * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2072 static void parse_function_call(struct state *state) {
2073 const struct func *func = NULL;
2074 struct expr *expr = NULL;
2075 int nargs = 0, find = 0;
2077 for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2078 if (looking_at(state, builtin_funcs[find].name, "(")) {
2079 func = builtin_funcs + find;
2084 STATE_ERROR(state, PATHX_ENAME);
2088 if (! match(state, ')')) {
2093 } while (match(state, ','));
2095 if (! match(state, ')')) {
2096 STATE_ERROR(state, PATHX_EPAREN);
2101 int found = 0; /* Whether there is a builtin matching in name and arity */
2102 for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2103 if (STRNEQ(func->name, builtin_funcs[i].name))
2105 if (builtin_funcs[i].arity == nargs) {
2106 func = builtin_funcs + i;
2113 STATE_ERROR(state, PATHX_EARITY);
2117 if (ALLOC(expr) < 0) {
2122 if (ALLOC_N(expr->args, nargs) < 0) {
2128 for (int i = nargs - 1; i >= 0; i--)
2129 expr->args[i] = pop_expr(state);
2131 push_expr(expr, state);
2135 * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2137 * The '$' is consumed by parse_primary_expr
2139 static void parse_var(struct state *state) {
2140 const char *id = state->pos;
2141 struct expr *expr = NULL;
2143 if (!isalpha(*id) && *id != '_') {
2144 STATE_ERROR(state, PATHX_ENAME);
2148 while (isalpha(*id) || isdigit(*id) || *id == '_')
2151 if (ALLOC(expr) < 0)
2154 expr->ident = strndup(state->pos, id - state->pos);
2155 if (expr->ident == NULL)
2158 push_expr(expr, state);
2168 * PrimaryExpr ::= Literal
2171 * | VariableReference
2175 static void parse_primary_expr(struct state *state) {
2176 if (peek(state, "'\"")) {
2177 parse_literal(state);
2178 } else if (peek(state, "0123456789")) {
2179 parse_number(state);
2180 } else if (match(state, '(')) {
2183 if (! match(state, ')')) {
2184 STATE_ERROR(state, PATHX_EPAREN);
2187 } else if (match(state, '$')) {
2190 parse_function_call(state);
2194 static int looking_at_primary_expr(struct state *state) {
2195 const char *s = state->pos;
2196 /* Is it a Number, Literal or VariableReference ? */
2197 if (peek(state, "$'\"0123456789"))
2200 /* Or maybe a function call, i.e. a word followed by a '(' ?
2201 * Note that our function names are only [a-zA-Z]+
2203 while (*s != '\0' && isalpha(*s)) s++;
2204 while (*s != '\0' && isspace(*s)) s++;
2209 * PathExpr ::= LocationPath
2211 * | FilterExpr '/' RelativeLocationPath
2212 * | FilterExpr '//' RelativeLocationPath
2214 * FilterExpr ::= PrimaryExpr Predicate
2216 * The grammar is ambiguous here: the expression '42' can either be the
2217 * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2218 * reason for this ambiguity is that we allow node names like '42' in the
2219 * tree; rather than forbid them, we resolve the ambiguity by always
2220 * parsing '42' as a number, and requiring that the user write the
2221 * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2223 static void parse_path_expr(struct state *state) {
2224 struct expr *expr = NULL;
2225 struct pred *predicates = NULL;
2226 struct locpath *locpath = NULL;
2228 if (looking_at_primary_expr(state)) {
2229 parse_primary_expr(state);
2231 predicates = parse_predicates(state);
2233 if (match(state, '/')) {
2234 if (match(state, '/')) {
2235 locpath = parse_relative_location_path(state);
2236 if (HAS_ERROR(state))
2239 struct step *step = make_step(DESCENDANT_OR_SELF, state);
2240 if (HAS_ERROR(state))
2242 list_cons(locpath->steps, step);
2244 if (*state->pos == '\0') {
2245 STATE_ERROR(state, PATHX_EEND);
2248 locpath = parse_relative_location_path(state);
2251 /* A PathExpr without predicates and locpath is
2252 * just a PrimaryExpr
2254 if (predicates == NULL && locpath == NULL)
2256 /* To make evaluation easier, we parse something like
2257 * $var[pred] as $var[pred]/.
2259 if (locpath == NULL) {
2260 if (ALLOC(locpath) < 0)
2262 if (ALLOC(locpath->steps) < 0)
2264 locpath->steps->axis = SELF;
2266 if (ALLOC(expr) < 0)
2268 expr->tag = E_FILTER;
2269 expr->predicates = predicates;
2270 expr->primary = pop_expr(state);
2271 expr->locpath = locpath;
2272 push_expr(expr, state);
2274 parse_location_path(state);
2279 free_pred(predicates);
2280 free_locpath(locpath);
2285 * UnionExpr ::= PathExpr ('|' PathExpr)*
2287 static void parse_union_expr(struct state *state) {
2288 parse_path_expr(state);
2290 while (match(state, '|')) {
2291 parse_path_expr(state);
2293 push_new_binary_op(OP_UNION, state);
2298 * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2300 static void parse_multiplicative_expr(struct state *state) {
2301 parse_union_expr(state);
2303 while (match(state, '*')) {
2304 parse_union_expr(state);
2306 push_new_binary_op(OP_STAR, state);
2311 * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2312 * AdditiveOp ::= '+' | '-'
2314 static void parse_additive_expr(struct state *state) {
2315 parse_multiplicative_expr(state);
2317 while (*state->pos == '+' || *state->pos == '-') {
2318 enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2321 parse_multiplicative_expr(state);
2323 push_new_binary_op(op, state);
2328 * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2329 * RelationalOp ::= ">" | "<" | ">=" | "<="
2331 static void parse_relational_expr(struct state *state) {
2332 parse_additive_expr(state);
2334 if (*state->pos == '<' || *state->pos == '>') {
2335 enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2337 if (*state->pos == '=') {
2338 op = (op == OP_LT) ? OP_LE : OP_GE;
2342 parse_additive_expr(state);
2344 push_new_binary_op(op, state);
2349 * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2350 * EqualityOp ::= "=" | "!="
2351 * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2352 * MatchOp ::= "=~" | "!~"
2354 static void parse_equality_expr(struct state *state) {
2355 parse_relational_expr(state);
2357 if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2358 enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2361 parse_relational_expr(state);
2363 push_new_binary_op(op, state);
2364 } else if (*state->pos == '=' ||
2365 (*state->pos == '!' && state->pos[1] == '=')) {
2366 enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2367 state->pos += (op == OP_EQ) ? 1 : 2;
2369 parse_relational_expr(state);
2371 push_new_binary_op(op, state);
2376 * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2378 static void parse_and_expr(struct state *state) {
2379 parse_equality_expr(state);
2381 while (*state->pos == 'a' && state->pos[1] == 'n'
2382 && state->pos[2] == 'd') {
2385 parse_equality_expr(state);
2387 push_new_binary_op(OP_AND, state);
2392 * OrExpr ::= AndExpr ('or' AndExpr)*
2394 static void parse_or_expr(struct state *state) {
2395 parse_and_expr(state);
2397 while (*state->pos == 'o' && state->pos[1] == 'r') {
2400 parse_and_expr(state);
2402 push_new_binary_op(OP_OR, state);
2409 static void parse_expr(struct state *state) {
2411 parse_or_expr(state);
2414 static void store_error(struct pathx *pathx) {
2415 const char *pathx_msg = NULL;
2416 const char *path = pathx->state->txt;
2417 const pathx_errcode_t errcode = pathx->state->errcode;
2418 struct error *err = pathx->state->error;
2420 char *pos_str = pathx->state->errmsg;
2421 pathx->state->errmsg = NULL;
2423 if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2428 err->code = AUG_ENOMEM;
2431 err->code = AUG_EMMATCH;
2433 case PATHX_ENOMATCH:
2434 err->code = AUG_ENOMATCH;
2437 err->code = AUG_EPATHX;
2441 /* We only need details for pathx syntax errors */
2442 if (err->code != AUG_EPATHX)
2446 pathx_msg = pathx_error(pathx, NULL, &pos);
2448 bool has_msg = pos_str != NULL;
2449 int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2450 if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2452 strcat(pos_str, " in ");
2453 strncat(pos_str, path, pos);
2455 /* initialize pos_str explicitly, path might be "" */
2457 strncat(pos_str, path, pos);
2459 strcat(pos_str, "|=|");
2460 strcat(pos_str, path + pos);
2463 err->minor = errcode;
2464 err->details = pos_str;
2466 err->minor_details = pathx_msg;
2469 int pathx_parse(const struct tree *tree,
2473 struct pathx_symtab *symtab,
2474 struct tree *root_ctx,
2475 struct pathx **pathx) {
2476 struct state *state = NULL;
2480 if (ALLOC(*pathx) < 0)
2483 (*pathx)->origin = (struct tree *) tree;
2486 if (ALLOC((*pathx)->state) < 0)
2488 state = (*pathx)->state;
2490 state->errcode = PATHX_NOERROR;
2491 state->errmsg = NULL;
2494 state->symtab = symtab;
2495 state->root_ctx = root_ctx;
2498 if (ALLOC_N(state->value_pool, 8) < 0) {
2502 state->value_pool_size = 8;
2503 state->value_pool[0].tag = T_BOOLEAN;
2504 state->value_pool[0].boolval = 0;
2505 state->value_pool[1].tag = T_BOOLEAN;
2506 state->value_pool[1].boolval = 1;
2507 state->value_pool_used = 2;
2511 if (HAS_ERROR(state))
2513 if (state->pos != state->txt + strlen(state->txt)) {
2514 STATE_ERROR(state, PATHX_EEND);
2518 if (state->exprs_used != 1) {
2519 STATE_ERROR(state, PATHX_EINTERNAL);
2524 check_expr(state->exprs[0], state);
2525 if (HAS_ERROR(state))
2528 if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2529 STATE_ERROR(state, PATHX_ETYPE);
2534 store_error(*pathx);
2535 return state->errcode;
2540 err->code = AUG_ENOMEM;
2541 return PATHX_ENOMEM;
2544 /*************************************************************************
2545 * Searching in the tree
2546 *************************************************************************/
2548 static bool step_matches(struct step *step, struct tree *tree) {
2549 if (step->name == NULL) {
2550 return step->axis == ROOT || tree->label != NULL;
2552 return streqx(step->name, tree->label);
2556 static struct tree *tree_prev(struct tree *pos) {
2557 struct tree *node = NULL;
2558 if (pos != pos->parent->children) {
2559 for (node = pos->parent->children;
2566 /* When the first step doesn't begin with ROOT then use relative root context
2568 static struct tree *step_root(struct step *step, struct tree *ctx,
2569 struct tree *root_ctx) {
2570 struct tree *node = NULL;
2571 switch (step->axis) {
2577 case PRECEDING_SIBLING:
2578 case FOLLOWING_SIBLING:
2579 /* only use root_ctx when ctx is the absolute tree root */
2580 if (ctx == ctx->parent && root_ctx != NULL)
2586 case DESCENDANT_OR_SELF:
2597 static struct tree *step_first(struct step *step, struct tree *ctx) {
2598 struct tree *node = NULL;
2599 switch (step->axis) {
2601 case DESCENDANT_OR_SELF:
2606 node = ctx->children;
2613 while (ctx->parent != ctx)
2617 case PRECEDING_SIBLING:
2618 node = tree_prev(ctx);
2620 case FOLLOWING_SIBLING:
2628 if (step_matches(step, node))
2630 return step_next(step, ctx, node);
2633 static struct tree *step_next(struct step *step, struct tree *ctx,
2634 struct tree *node) {
2635 while (node != NULL) {
2636 switch (step->axis) {
2644 case DESCENDANT_OR_SELF:
2645 if (node->children != NULL) {
2646 node = node->children;
2648 while (node->next == NULL && node != ctx)
2649 node = node->parent;
2661 if (node->parent == node)
2664 node = node->parent;
2666 case PRECEDING_SIBLING:
2667 node = tree_prev(node);
2669 case FOLLOWING_SIBLING:
2675 if (node != NULL && step_matches(step, node))
2681 static struct value *pathx_eval(struct pathx *pathx) {
2682 struct state *state = pathx->state;
2683 state->ctx = pathx->origin;
2686 eval_expr(state->exprs[0], state);
2687 if (HAS_ERROR(state))
2690 if (state->values_used != 1) {
2691 STATE_ERROR(state, PATHX_EINTERNAL);
2694 return pop_value(state);
2697 struct tree *pathx_next(struct pathx *pathx) {
2698 if (pathx->node + 1 < pathx->nodeset->used)
2699 return pathx->nodeset->nodes[++pathx->node];
2703 /* Find the first node in TREE matching PATH. */
2704 struct tree *pathx_first(struct pathx *pathx) {
2705 if (pathx->nodeset == NULL) {
2706 struct value *v = pathx_eval(pathx);
2708 if (HAS_ERROR(pathx->state))
2710 assert(v->tag == T_NODESET);
2711 pathx->nodeset = v->nodeset;
2714 if (pathx->nodeset->used == 0)
2717 return pathx->nodeset->nodes[0];
2723 /* Find a node in the tree that matches the longest prefix of PATH.
2725 * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2726 * prefix matches, and -1 if more than one node in the tree match.
2728 * TMATCH is set to the tree node that matches, and SMATCH to the next step
2729 * after the one where TMATCH matched. If no node matches or multiple nodes
2730 * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2731 * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2733 static int locpath_search(struct locpath_trace *lpt,
2734 struct tree **tmatch, struct step **smatch) {
2738 for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2740 *smatch = lpt->lp->steps;
2744 if (lpt->ns[last]->used > 1) {
2749 *tmatch = lpt->ns[last]->nodes[0];
2750 *smatch = lpt->lp->steps;
2751 for (int i=0; i < last; i++)
2752 *smatch = (*smatch)->next;
2754 for (int i=0; i < lpt->maxns; i++)
2755 free_nodeset(lpt->ns[i]);
2760 /* Expand the tree ROOT so that it contains all components of PATH. PATH
2761 * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2763 * Return the first segment that was created by this operation, or NULL on
2766 int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2768 struct step *step = NULL;
2769 struct locpath_trace lpt;
2770 struct tree *first_child = NULL;
2771 struct value *v = NULL;
2774 path->state->locpath_trace = &lpt;
2775 v = pathx_eval(path);
2776 path->state->locpath_trace = NULL;
2777 if (HAS_ERROR(path->state))
2780 if (lpt.maxns == 0) {
2781 if (v->tag != T_NODESET || v->nodeset->used == 0) {
2782 STATE_ERROR(path->state, PATHX_ENOMATCH);
2785 if (v->nodeset->used > 1)
2787 *tree = v->nodeset->nodes[0];
2791 *tree = path->origin;
2792 r = locpath_search(&lpt, tree, &step);
2794 STATE_ERROR(path->state, PATHX_EMMATCH);
2801 struct tree *parent = *tree;
2803 parent = path->origin;
2805 list_for_each(s, step) {
2806 if (s->name == NULL || s->axis != CHILD)
2808 struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2809 if (first_child == NULL)
2811 if (t == NULL || t->label == NULL)
2813 list_append(parent->children, t);
2817 while (first_child->children != NULL)
2818 first_child = first_child->children;
2820 *tree = first_child;
2824 if (first_child != NULL) {
2825 list_remove(first_child, first_child->parent->children);
2826 free_tree(first_child);
2833 int pathx_find_one(struct pathx *path, struct tree **tree) {
2834 *tree = pathx_first(path);
2835 if (HAS_ERROR(path->state))
2837 return path->nodeset->used;
2840 struct error *err_of_pathx(struct pathx *px) {
2841 return px->state->error;
2844 const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
2845 int errcode = PATHX_ENOMEM;
2848 if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
2849 errcode = path->state->errcode;
2851 errcode = PATHX_EINTERNAL;
2854 *txt = path->state->txt;
2857 *pos = path->state->pos - path->state->txt;
2859 return errcodes[errcode];
2865 static struct pathx_symtab
2866 *make_symtab(struct pathx_symtab *symtab, const char *name,
2867 struct value *value)
2869 struct pathx_symtab *new;
2876 if (ALLOC(new) < 0) {
2882 if (symtab == NULL) {
2885 new->next = symtab->next;
2891 void free_symtab(struct pathx_symtab *symtab) {
2893 while (symtab != NULL) {
2894 struct pathx_symtab *del = symtab;
2897 release_value(del->value);
2903 struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
2904 return pathx->state->symtab;
2907 static int pathx_symtab_set(struct pathx_symtab **symtab,
2908 const char *name, struct value *v) {
2911 list_for_each(tab, *symtab) {
2912 if (STREQ(tab->name, name)) {
2913 release_value(tab->value);
2922 struct pathx_symtab *new = NULL;
2924 new = make_symtab(*symtab, name, v);
2934 int pathx_symtab_define(struct pathx_symtab **symtab,
2935 const char *name, struct pathx *px) {
2937 struct value *value = NULL, *v = NULL;
2938 struct state *state = px->state;
2940 value = pathx_eval(px);
2941 if (HAS_ERROR(px->state))
2950 value->tag = T_BOOLEAN;
2952 r = pathx_symtab_set(symtab, name, v);
2958 if (v->tag == T_NODESET)
2959 return v->nodeset->used;
2963 release_value(value);
2971 int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
2972 struct pathx_symtab *del = NULL;
2975 del != NULL && !STREQ(del->name, name);
2979 list_remove(del, *symtab);
2984 int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
2985 const char *name, struct tree *tree) {
2986 struct value *v = NULL;
2993 if (ALLOC(v->nodeset) < 0)
2995 if (ALLOC_N(v->nodeset->nodes, 1) < 0)
2997 v->nodeset->used = 1;
2998 v->nodeset->size = 1;
2999 v->nodeset->nodes[0] = tree;
3001 r = pathx_symtab_set(symtab, name, v);
3011 void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3012 const struct tree *tree) {
3013 list_for_each(tab, symtab) {
3014 if (tab->value->tag != T_NODESET)
3016 struct nodeset *ns = tab->value->nodeset;
3017 for (int i=0; i < ns->used;) {
3018 struct tree *t = ns->nodes[i];
3019 while (t != t->parent && t != tree)
3022 ns_remove(ns, i, 1);
3031 * indent-tabs-mode: nil