2 * pathx.c: handling path expressions
4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
34 static const char *const errcodes[] = {
37 "illegal string literal",
39 "string missing ending ' or \"",
45 "internal error", /* PATHX_EINTERNAL */
46 "type error", /* PATHX_ETYPE */
47 "undefined variable", /* PATHX_ENOVAR */
48 "garbage at end of path expression", /* PATHX_EEND */
49 "no match for path expression", /* PATHX_ENOMATCH */
50 "wrong number of arguments in function call", /* PATHX_EARITY */
51 "invalid regular expression", /* PATHX_EREGEXP */
52 "too many matches", /* PATHX_EMMATCH */
53 "wrong flag for regexp" /* PATHX_EREGEXPFLAG */
57 * Path expressions are strings that use a notation modelled on XPath.
61 T_NONE = 0, /* Not a type */
89 OP_RE_MATCH, /* '=~' */
90 OP_RE_NOMATCH, /* '!~' */
111 /* This array is indexed by enum axis */
112 static const char *const axis_names[] = {
116 "descendant-or-self",
124 /* The characters that can follow a name in a location expression (aka path)
125 * The parser will assume that name (path component) is finished when it
126 * encounters any of these characters, unless they are escaped by preceding
129 * See parse_name for the gory details
131 static const char name_follow[] = "][|/=()!,";
133 /* Doubly linked list of location steps. Besides the information from the
134 * path expression, also contains information to iterate over a node set,
135 * in particular, the context node CTX for the step, and the current node
136 * CUR within that context.
141 char *name; /* NULL to match any name */
142 struct pred *predicates;
145 /* Initialise the root nodeset with the first step */
146 static struct tree *step_root(struct step *step, struct tree *ctx,
147 struct tree *root_ctx);
148 /* Iteration over the nodes on a step, ignoring the predicates */
149 static struct tree *step_first(struct step *step, struct tree *ctx);
150 static struct tree *step_next(struct step *step, struct tree *ctx,
153 struct pathx_symtab {
154 struct pathx_symtab *next;
161 struct nodeset *nodeset;
179 typedef uint32_t value_ind_t;
184 struct nodeset *nodeset; /* T_NODESET */
185 int64_t number; /* T_NUMBER */
186 char *string; /* T_STRING */
187 bool boolval; /* T_BOOLEAN */
188 struct regexp *regexp; /* T_REGEXP */
196 struct { /* E_FILTER */
197 struct expr *primary;
198 struct pred *predicates;
199 struct locpath *locpath;
201 struct { /* E_BINARY */
206 value_ind_t value_ind; /* E_VALUE */
207 char *ident; /* E_VAR */
209 const struct func *func;
211 /* If fold is true, replace this function invocation
212 * with its value after the first time we evaluate this
219 struct locpath_trace {
225 /* Internal state of the evaluator/parser */
227 pathx_errcode_t errcode;
232 const char *txt; /* Entire expression */
233 const char *pos; /* Current position within TXT during parsing */
235 struct tree *ctx; /* The current node */
239 struct tree *root_ctx; /* Root context for relative paths */
241 /* A table of all values. The table is dynamically reallocated, i.e.
242 * pointers to struct value should not be used across calls that
243 * might allocate new values
245 * value_pool[0] is always the boolean false, and value_pool[1]
246 * always the boolean true
248 struct value *value_pool;
249 value_ind_t value_pool_used;
250 value_ind_t value_pool_size;
251 /* Stack of values (as indices into value_pool), with bottom of
252 stack in values[0] */
256 /* Stack of expressions, with bottom of stack in exprs[0] */
260 /* Trace of a locpath evaluation, needed by pathx_expand_tree.
261 Generally NULL, unless a trace is needed.
263 struct locpath_trace *locpath_trace;
264 /* Symbol table for variable lookups */
265 struct pathx_symtab *symtab;
266 /* Error structure, used to communicate errors to struct augeas;
267 * we never own this structure, and therefore never free it */
271 /* We consider NULL and the empty string to be equal */
273 static inline int streqx(const char *s1, const char *s2) {
275 return (s2 == NULL || strlen(s2) == 0);
277 return strlen(s1) == 0;
278 return STREQ(s1, s2);
283 typedef void (*func_impl_t)(struct state *state, int nargs);
289 bool pure; /* Result only depends on args */
290 const enum type *arg_types;
294 static void func_last(struct state *state, int nargs);
295 static void func_position(struct state *state, int nargs);
296 static void func_count(struct state *state, int nargs);
297 static void func_label(struct state *state, int nargs);
298 static void func_regexp(struct state *state, int nargs);
299 static void func_regexp_flag(struct state *state, int nargs);
300 static void func_glob(struct state *state, int nargs);
301 static void func_int(struct state *state, int nargs);
302 static void func_not(struct state *state, int nargs);
304 static const enum type arg_types_nodeset[] = { T_NODESET };
305 static const enum type arg_types_string[] = { T_STRING };
306 static const enum type arg_types_bool[] = { T_BOOLEAN };
307 static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
308 static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
310 static const struct func builtin_funcs[] = {
311 { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
312 .impl = func_last, .pure = false },
313 { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
314 .impl = func_position, .pure = false },
315 { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
316 .impl = func_label, .pure = false },
317 { .name = "count", .arity = 1, .type = T_NUMBER,
318 .arg_types = arg_types_nodeset,
319 .impl = func_count, .pure = false },
320 { .name = "regexp", .arity = 1, .type = T_REGEXP,
321 .arg_types = arg_types_string,
322 .impl = func_regexp, .pure = true },
323 { .name = "regexp", .arity = 1, .type = T_REGEXP,
324 .arg_types = arg_types_nodeset,
325 .impl = func_regexp, .pure = true },
326 { .name = "regexp", .arity = 2, .type = T_REGEXP,
327 .arg_types = arg_types_string_string,
328 .impl = func_regexp_flag, .pure = true },
329 { .name = "regexp", .arity = 2, .type = T_REGEXP,
330 .arg_types = arg_types_nodeset_string,
331 .impl = func_regexp_flag, .pure = true },
332 { .name = "glob", .arity = 1, .type = T_REGEXP,
333 .arg_types = arg_types_string,
334 .impl = func_glob, .pure = true },
335 { .name = "glob", .arity = 1, .type = T_REGEXP,
336 .arg_types = arg_types_nodeset,
337 .impl = func_glob, .pure = true },
338 { .name = "int", .arity = 1, .type = T_NUMBER,
339 .arg_types = arg_types_string, .impl = func_int, .pure = false },
340 { .name = "int", .arity = 1, .type = T_NUMBER,
341 .arg_types = arg_types_nodeset, .impl = func_int, .pure = false },
342 { .name = "int", .arity = 1, .type = T_NUMBER,
343 .arg_types = arg_types_bool, .impl = func_int, .pure = false },
344 { .name = "not", .arity = 1, .type = T_BOOLEAN,
345 .arg_types = arg_types_bool, .impl = func_not, .pure = true }
348 #define RET_ON_ERROR \
349 if (state->errcode != PATHX_NOERROR) return
351 #define RET0_ON_ERROR \
352 if (state->errcode != PATHX_NOERROR) return 0
354 #define STATE_ERROR(state, err) \
356 state->errcode = err; \
357 state->file = __FILE__; \
358 state->line = __LINE__; \
361 #define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
363 #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
366 * Free the various data structures
369 static void free_expr(struct expr *expr);
371 static void free_pred(struct pred *pred) {
375 for (int i=0; i < pred->nexpr; i++) {
376 free_expr(pred->exprs[i]);
382 static void free_step(struct step *step) {
383 while (step != NULL) {
384 struct step *del = step;
387 free_pred(del->predicates);
392 static void free_locpath(struct locpath *locpath) {
395 while (locpath->steps != NULL) {
396 struct step *step = locpath->steps;
397 locpath->steps = step->next;
399 free_pred(step->predicates);
405 static void free_expr(struct expr *expr) {
410 free_expr(expr->primary);
411 free_pred(expr->predicates);
412 free_locpath(expr->locpath);
415 free_expr(expr->left);
416 free_expr(expr->right);
424 for (int i=0; i < expr->func->arity; i++)
425 free_expr(expr->args[i]);
434 static void free_nodeset(struct nodeset *ns) {
441 /* Free all objects used by VALUE, but not VALUE itself */
442 static void release_value(struct value *v) {
448 free_nodeset(v->nodeset);
457 unref(v->regexp, regexp);
464 static void free_state(struct state *state) {
468 for(int i=0; i < state->exprs_used; i++)
469 free_expr(state->exprs[i]);
472 for(int i=0; i < state->value_pool_used; i++)
473 release_value(state->value_pool + i);
474 free(state->value_pool);
479 void free_pathx(struct pathx *pathx) {
482 free_state(pathx->state);
489 static struct nodeset *make_nodeset(struct state *state) {
490 struct nodeset *result;
491 if (ALLOC(result) < 0)
496 /* Add NODE to NS if it is not in NS yet. This relies on the flag
497 * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
498 * as soon as we are done adding nodes to it.
500 static void ns_add(struct nodeset *ns, struct tree *node,
501 struct state *state) {
504 if (ns->used >= ns->size) {
505 size_t size = 2 * ns->size;
506 if (size < 10) size = 10;
507 if (REALLOC_N(ns->nodes, size) < 0)
511 ns->nodes[ns->used] = node;
516 static void ns_clear_added(struct nodeset *ns) {
517 for (int i=0; i < ns->used; i++)
518 ns->nodes[i]->added = 0;
521 static struct nodeset *
522 clone_nodeset(struct nodeset *ns, struct state *state)
524 struct nodeset *clone;
525 if (ALLOC(clone) < 0) {
529 if (ALLOC_N(clone->nodes, ns->used) < 0) {
534 clone->used = ns->used;
535 clone->size = ns->used;
536 for (int i=0; i < ns->used; i++)
537 clone->nodes[i] = ns->nodes[i];
544 static value_ind_t make_value(enum type tag, struct state *state) {
545 assert(tag != T_BOOLEAN);
547 if (state->value_pool_used >= state->value_pool_size) {
548 value_ind_t new_size = 2*state->value_pool_size;
549 if (new_size <= state->value_pool_size) {
553 if (REALLOC_N(state->value_pool, new_size) < 0) {
557 state->value_pool_size = new_size;
559 state->value_pool[state->value_pool_used].tag = tag;
560 state->value_pool[state->value_pool_used].nodeset = NULL;
561 return state->value_pool_used++;
564 static value_ind_t clone_value(struct value *v, struct state *state) {
565 value_ind_t vind = make_value(v->tag, state);
567 struct value *clone = state->value_pool + vind;
571 clone->nodeset = clone_nodeset(v->nodeset, state);
574 clone->number = v->number;
577 clone->string = strdup(v->string);
578 if (clone->string == NULL) {
583 clone->boolval = v->boolval;
586 clone->regexp = ref(v->regexp);
594 static value_ind_t pop_value_ind(struct state *state) {
595 if (state->values_used > 0) {
596 state->values_used -= 1;
597 return state->values[state->values_used];
599 STATE_ERROR(state, PATHX_EINTERNAL);
605 static struct value *pop_value(struct state *state) {
606 value_ind_t vind = pop_value_ind(state);
607 if (HAS_ERROR(state))
609 return state->value_pool + vind;
612 static void push_value(value_ind_t vind, struct state *state) {
613 if (state->values_used >= state->values_size) {
614 size_t new_size = 2*state->values_size;
615 if (new_size == 0) new_size = 8;
616 if (REALLOC_N(state->values, new_size) < 0) {
620 state->values_size = new_size;
622 state->values[state->values_used++] = vind;
625 static void push_boolean_value(int b, struct state *state) {
626 push_value(b != 0, state);
630 static struct value *expr_value(struct expr *expr, struct state *state) {
631 return state->value_pool + expr->value_ind;
634 /*************************************************************************
636 ************************************************************************/
637 static void eval_expr(struct expr *expr, struct state *state);
640 #define ensure_arity(min, max) \
641 if (nargs < min || nargs > max) { \
642 STATE_ERROR(state, PATHX_EINTERNAL); \
646 static void func_last(struct state *state, int nargs) {
648 value_ind_t vind = make_value(T_NUMBER, state);
651 state->value_pool[vind].number = state->ctx_len;
652 push_value(vind, state);
655 static void func_position(struct state *state, int nargs) {
657 value_ind_t vind = make_value(T_NUMBER, state);
660 state->value_pool[vind].number = state->ctx_pos;
661 push_value(vind, state);
664 static void func_count(struct state *state, int nargs) {
666 value_ind_t vind = make_value(T_NUMBER, state);
669 struct value *ns = pop_value(state);
670 state->value_pool[vind].number = ns->nodeset->used;
671 push_value(vind, state);
674 static void func_label(struct state *state, int nargs) {
676 value_ind_t vind = make_value(T_STRING, state);
681 if (state->ctx->label)
682 s = strdup(state->ctx->label);
689 state->value_pool[vind].string = s;
690 push_value(vind, state);
693 static void func_int(struct state *state, int nargs) {
695 value_ind_t vind = make_value(T_NUMBER, state);
699 struct value *v = pop_value(state);
700 if (v->tag == T_BOOLEAN) {
703 const char *s = NULL;
704 if (v->tag == T_STRING) {
708 if (v->nodeset->used != 1) {
709 STATE_ERROR(state, PATHX_EMMATCH);
712 s = v->nodeset->nodes[0]->value;
716 r = xstrtoint64(s, 10, &i);
718 STATE_ERROR(state, PATHX_ENUMBER);
723 state->value_pool[vind].number = i;
724 push_value(vind, state);
727 static void func_not(struct state *state, int nargs) {
731 struct value *v = pop_value(state);
732 if (v->tag == T_BOOLEAN) {
733 push_boolean_value(! v->boolval, state);
737 static struct regexp *
738 nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
739 struct regexp *result = NULL;
740 struct regexp **rx = NULL;
743 for (int i = 0; i < ns->used; i++) {
744 if (ns->nodes[i]->value != NULL)
749 /* If the nodeset is empty, make sure we produce a regexp
750 * that never matches anything */
751 result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
753 if (ALLOC_N(rx, ns->used) < 0)
755 for (int i=0; i < ns->used; i++) {
756 if (ns->nodes[i]->value == NULL)
760 rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
762 rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
766 result = regexp_union_n(info, ns->used, rx);
771 for (int i=0; i < ns->used; i++)
772 unref(rx[i], regexp);
778 static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
779 value_ind_t vind = make_value(T_REGEXP, state);
784 struct value *v = pop_value(state);
785 struct regexp *rx = NULL;
787 if (v->tag == T_STRING) {
789 rx = make_regexp_from_glob(state->error->info, v->string);
791 rx = make_regexp_unescape(state->error->info, v->string, nocase);
792 } else if (v->tag == T_NODESET) {
793 rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
803 state->value_pool[vind].regexp = rx;
804 r = regexp_compile(rx);
807 regexp_check(rx, &msg);
808 state->errmsg = strdup(msg);
809 STATE_ERROR(state, PATHX_EREGEXP);
812 push_value(vind, state);
815 static void func_regexp(struct state *state, int nargs) {
817 func_regexp_or_glob(state, 0, 0);
820 static void func_regexp_flag(struct state *state, int nargs) {
823 struct value *f = pop_value(state);
825 if (STREQ("i", f->string))
828 STATE_ERROR(state, PATHX_EREGEXPFLAG);
830 func_regexp_or_glob(state, 0, nocase);
833 static void func_glob(struct state *state, int nargs) {
835 func_regexp_or_glob(state, 1, 0);
838 static bool coerce_to_bool(struct value *v) {
841 return v->nodeset->used > 0;
847 return v->number > 0;
850 return strlen(v->string) > 0;
860 static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
862 for (int i1=0; i1 < ns1->used; i1++) {
863 struct tree *t1 = ns1->nodes[i1];
864 for (int i2=0; i2 < ns2->used; i2++) {
865 struct tree *t2 = ns2->nodes[i2];
867 if (!streqx(t1->value, t2->value))
870 if (streqx(t1->value, t2->value))
878 static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
880 for (int i=0; i < ns->used; i++) {
881 struct tree *t = ns->nodes[i];
883 if (!streqx(t->value, s))
886 if (streqx(t->value, s))
893 static void eval_eq(struct state *state, int neq) {
894 struct value *r = pop_value(state);
895 struct value *l = pop_value(state);
898 if (l->tag == T_NODESET && r->tag == T_NODESET) {
899 res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
900 } else if (l->tag == T_NODESET) {
901 res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
902 } else if (r->tag == T_NODESET) {
903 res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
904 } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
906 res = (l->number != r->number);
908 res = (l->number == r->number);
910 assert(l->tag == T_STRING);
911 assert(r->tag == T_STRING);
912 res = streqx(l->string, r->string);
918 push_boolean_value(res, state);
921 static void eval_arith(struct state *state, enum binary_op op) {
922 value_ind_t vind = make_value(T_NUMBER, state);
923 struct value *r = pop_value(state);
924 struct value *l = pop_value(state);
927 assert(l->tag == T_NUMBER);
928 assert(r->tag == T_NUMBER);
933 res = l->number + r->number;
934 else if (op == OP_MINUS)
935 res = l->number - r->number;
936 else if (op == OP_STAR)
937 res = l->number * r->number;
941 state->value_pool[vind].number = res;
942 push_value(vind, state);
945 static void eval_rel(struct state *state, bool greater, bool equal) {
949 /* We always check l < r or l <= r */
951 l = pop_value(state);
952 r = pop_value(state);
954 r = pop_value(state);
955 l = pop_value(state);
957 if (l->tag == T_NUMBER) {
959 res = (l->number <= r->number);
961 res = (l->number < r->number);
962 } else if (l->tag == T_STRING) {
963 int cmp = strcmp(l->string, r->string);
972 push_boolean_value(res, state);
975 static void eval_and_or(struct state *state, enum binary_op op) {
976 struct value *r = pop_value(state);
977 struct value *l = pop_value(state);
978 bool left = coerce_to_bool(l);
979 bool right = coerce_to_bool(r);
983 push_boolean_value(left && right, state);
985 push_boolean_value(left || right, state);
988 static bool eval_re_match_str(struct state *state, struct regexp *rx,
995 r = regexp_match(rx, str, strlen(str), 0, NULL);
997 STATE_ERROR(state, PATHX_EINTERNAL);
998 } else if (r == -3) {
999 /* We should never get this far; func_regexp should catch
1000 * invalid regexps */
1003 return r == strlen(str);
1006 static void eval_union(struct state *state) {
1007 value_ind_t vind = make_value(T_NODESET, state);
1008 struct value *r = pop_value(state);
1009 struct value *l = pop_value(state);
1010 struct nodeset *res = NULL;
1012 assert(l->tag == T_NODESET);
1013 assert(r->tag == T_NODESET);
1017 res = clone_nodeset(l->nodeset, state);
1019 for (int i=0; i < r->nodeset->used; i++) {
1020 ns_add(res, r->nodeset->nodes[i], state);
1021 if (HAS_ERROR(state))
1024 state->value_pool[vind].nodeset = res;
1025 push_value(vind, state);
1027 ns_clear_added(res);
1030 static void eval_concat_string(struct state *state) {
1031 value_ind_t vind = make_value(T_STRING, state);
1032 struct value *r = pop_value(state);
1033 struct value *l = pop_value(state);
1038 if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1042 strcpy(res, l->string);
1043 strcat(res, r->string);
1044 state->value_pool[vind].string = res;
1045 push_value(vind, state);
1048 static void eval_concat_regexp(struct state *state) {
1049 value_ind_t vind = make_value(T_REGEXP, state);
1050 struct value *r = pop_value(state);
1051 struct value *l = pop_value(state);
1052 struct regexp *rx = NULL;
1056 rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1062 state->value_pool[vind].regexp = rx;
1063 push_value(vind, state);
1066 static void eval_re_match(struct state *state, enum binary_op op) {
1067 struct value *rx = pop_value(state);
1068 struct value *v = pop_value(state);
1070 bool result = false;
1072 if (v->tag == T_STRING) {
1073 result = eval_re_match_str(state, rx->regexp, v->string);
1075 } else if (v->tag == T_NODESET) {
1076 for (int i=0; i < v->nodeset->used && result == false; i++) {
1077 struct tree *t = v->nodeset->nodes[i];
1078 result = eval_re_match_str(state, rx->regexp, t->value);
1082 if (op == OP_RE_NOMATCH)
1084 push_boolean_value(result, state);
1087 static void eval_binary(struct expr *expr, struct state *state) {
1088 eval_expr(expr->left, state);
1089 eval_expr(expr->right, state);
1100 eval_rel(state, false, false);
1103 eval_rel(state, false, true);
1106 eval_rel(state, true, false);
1109 eval_rel(state, true, true);
1112 if (expr->type == T_NUMBER)
1113 eval_arith(state, expr->op);
1114 else if (expr->type == T_STRING)
1115 eval_concat_string(state);
1116 else if (expr->type == T_REGEXP)
1117 eval_concat_regexp(state);
1121 eval_arith(state, expr->op);
1125 eval_and_or(state, expr->op);
1132 eval_re_match(state, expr->op);
1139 static void eval_app(struct expr *expr, struct state *state) {
1140 assert(expr->tag == E_APP);
1142 for (int i=0; i < expr->func->arity; i++) {
1143 eval_expr(expr->args[i], state);
1146 expr->func->impl(state, expr->func->arity);
1149 static bool eval_pred(struct expr *expr, struct state *state) {
1150 eval_expr(expr, state);
1153 struct value *v = pop_value(state);
1158 return (state->ctx_pos == v->number);
1160 return v->nodeset->used > 0;
1162 return streqv(state->ctx->value, v->string);
1169 /* Remove COUNT successive entries from NS. The first entry to remove is at
1171 static void ns_remove(struct nodeset *ns, int ind, int count) {
1174 memmove(ns->nodes + ind, ns->nodes + ind+count,
1175 sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1180 * Remove all nodes from NS for which one of PRED is false
1182 static void ns_filter(struct nodeset *ns, struct pred *predicates,
1183 struct state *state) {
1184 if (predicates == NULL)
1187 struct tree *old_ctx = state->ctx;
1188 uint old_ctx_len = state->ctx_len;
1189 uint old_ctx_pos = state->ctx_pos;
1191 for (int p=0; p < predicates->nexpr; p++) {
1192 int first_bad = -1; /* The index of the first non-matching node */
1193 state->ctx_len = ns->used;
1195 for (int i=0; i < ns->used; state->ctx_pos++) {
1196 state->ctx = ns->nodes[i];
1197 bool match = eval_pred(predicates->exprs[p], state);
1199 /* We remove non-matching nodes from NS in batches; this logic
1200 * makes sure that we only call ns_remove at the end of a run
1201 * of non-matching nodes
1204 if (first_bad >= 0) {
1205 ns_remove(ns, first_bad, i - first_bad);
1212 if (first_bad == -1)
1217 if (first_bad >= 0) {
1218 ns_remove(ns, first_bad, ns->used - first_bad);
1222 state->ctx = old_ctx;
1223 state->ctx_pos = old_ctx_pos;
1224 state->ctx_len = old_ctx_len;
1227 /* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1228 static bool position_pred(struct pred *pred) {
1229 return pred != NULL &&
1231 pred->exprs[0]->tag == E_VALUE &&
1232 pred->exprs[0]->type == T_NUMBER;
1235 /* Return the tree node at the position implied by STEP->PREDICATES. It is
1236 assumed and required that STEP->PREDICATES is actually a
1239 This method hand-optimizes the important case of a path expression like
1242 static struct tree *position_filter(struct nodeset *ns,
1244 struct state *state) {
1245 int value_ind = step->predicates->exprs[0]->value_ind;
1246 int number = state->value_pool[value_ind].number;
1249 for (int i=0; i < ns->used; i++) {
1250 for (struct tree *node = step_first(step, ns->nodes[i]);
1252 node = step_next(step, ns->nodes[i], node), pos++) {
1261 /* Return an array of nodesets, one for each step in the locpath.
1263 * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1264 * contain the nodes that matched the entire locpath
1266 static void ns_from_locpath(struct locpath *lp, uint *maxns,
1267 struct nodeset ***ns,
1268 const struct nodeset *root,
1269 struct state *state) {
1270 struct tree *old_ctx = state->ctx;
1273 ensure(lp != NULL, state);
1276 list_for_each(step, lp->steps)
1278 if (ALLOC_N(*ns, *maxns+1) < 0) {
1279 STATE_ERROR(state, PATHX_ENOMEM);
1282 for (int i=0; i <= *maxns; i++) {
1283 (*ns)[i] = make_nodeset(state);
1284 if (HAS_ERROR(state))
1289 struct step *first_step = NULL;
1290 first_step = lp->steps;
1292 struct tree *root_tree;
1293 root_tree = step_root(first_step, state->ctx, state->root_ctx);
1294 ns_add((*ns)[0], root_tree, state);
1295 ns_clear_added((*ns)[0]);
1297 for (int i=0; i < root->used; i++)
1298 ns_add((*ns)[0], root->nodes[i], state);
1299 ns_clear_added((*ns)[0]);
1302 if (HAS_ERROR(state))
1306 list_for_each(step, lp->steps) {
1307 struct nodeset *work = (*ns)[cur_ns];
1308 struct nodeset *next = (*ns)[cur_ns + 1];
1309 if (position_pred(step->predicates)) {
1310 struct tree *node = position_filter(work, step, state);
1312 ns_add(next, node, state);
1313 ns_clear_added(next);
1316 for (int i=0; i < work->used; i++) {
1317 for (struct tree *node = step_first(step, work->nodes[i]);
1319 node = step_next(step, work->nodes[i], node)) {
1320 ns_add(next, node, state);
1323 ns_clear_added(next);
1324 ns_filter(next, step->predicates, state);
1325 if (HAS_ERROR(state))
1331 state->ctx = old_ctx;
1335 for (int i=0; i <= *maxns; i++)
1336 free_nodeset((*ns)[i]);
1339 state->ctx = old_ctx;
1343 static void eval_filter(struct expr *expr, struct state *state) {
1344 struct locpath *lp = expr->locpath;
1345 struct nodeset **ns = NULL;
1346 struct locpath_trace *lpt = state->locpath_trace;
1349 state->locpath_trace = NULL;
1350 if (expr->primary == NULL) {
1351 ns_from_locpath(lp, &maxns, &ns, NULL, state);
1353 eval_expr(expr->primary, state);
1355 value_ind_t primary_ind = pop_value_ind(state);
1356 struct value *primary = state->value_pool + primary_ind;
1357 assert(primary->tag == T_NODESET);
1358 ns_filter(primary->nodeset, expr->predicates, state);
1359 /* Evaluating predicates might have reallocated the value_pool */
1360 primary = state->value_pool + primary_ind;
1361 ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1365 value_ind_t vind = make_value(T_NODESET, state);
1367 state->value_pool[vind].nodeset = ns[maxns];
1368 push_value(vind, state);
1371 assert(lpt->ns == NULL);
1372 assert(lpt->lp == NULL);
1376 state->locpath_trace = lpt;
1378 for (int i=0; i < maxns; i++)
1379 free_nodeset(ns[i]);
1384 static struct value *lookup_var(const char *ident,
1385 const struct pathx_symtab *symtab) {
1386 list_for_each(tab, symtab) {
1387 if (STREQ(ident, tab->name))
1393 static void eval_var(struct expr *expr, struct state *state) {
1394 struct value *v = lookup_var(expr->ident, state->symtab);
1395 value_ind_t vind = clone_value(v, state);
1397 push_value(vind, state);
1400 static void eval_expr(struct expr *expr, struct state *state) {
1402 switch (expr->tag) {
1404 eval_filter(expr, state);
1407 eval_binary(expr, state);
1410 push_value(expr->value_ind, state);
1413 eval_var(expr, state);
1416 eval_app(expr, state);
1418 /* Do constant folding: replace the function application with
1419 * a reference to the value that resulted from evaluating it */
1420 for (int i=0; i < expr->func->arity; i++)
1421 free_expr(expr->args[i]);
1423 value_ind_t vind = state->values_used - 1;
1424 expr->tag = E_VALUE;
1425 expr->value_ind = state->values[vind];
1433 /*************************************************************************
1435 *************************************************************************/
1437 static void check_expr(struct expr *expr, struct state *state);
1439 /* Typecheck a list of predicates. A predicate is a function of
1440 * one of the following types:
1442 * T_NODESET -> T_BOOLEAN
1443 * T_NUMBER -> T_BOOLEAN (position test)
1444 * T_BOOLEAN -> T_BOOLEAN
1446 static void check_preds(struct pred *pred, struct state *state) {
1449 for (int i=0; i < pred->nexpr; i++) {
1450 struct expr *e = pred->exprs[i];
1451 check_expr(e, state);
1453 if (e->type != T_NODESET && e->type != T_NUMBER &&
1454 e->type != T_BOOLEAN && e->type != T_STRING) {
1455 STATE_ERROR(state, PATHX_ETYPE);
1461 static void check_filter(struct expr *expr, struct state *state) {
1462 assert(expr->tag == E_FILTER);
1463 struct locpath *locpath = expr->locpath;
1465 if (expr->primary != NULL) {
1466 check_expr(expr->primary, state);
1467 if (expr->primary->type != T_NODESET) {
1468 STATE_ERROR(state, PATHX_ETYPE);
1471 check_preds(expr->predicates, state);
1474 list_for_each(s, locpath->steps) {
1475 check_preds(s->predicates, state);
1478 expr->type = T_NODESET;
1481 static void check_app(struct expr *expr, struct state *state) {
1482 assert(expr->tag == E_APP);
1484 for (int i=0; i < expr->func->arity; i++) {
1485 check_expr(expr->args[i], state);
1490 for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1491 const struct func *fn = builtin_funcs + f;
1492 if (STRNEQ(expr->func->name, fn->name))
1494 if (expr->func->arity != fn->arity)
1498 for (int i=0; i < expr->func->arity; i++) {
1499 if (expr->args[i]->type != fn->arg_types[i]) {
1508 if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1509 expr->func = builtin_funcs + f;
1510 expr->type = expr->func->type;
1511 expr->fold = expr->func->pure;
1513 /* We only do constant folding for invocations of pure functions
1514 * whose arguments are literal values. That misses opportunities
1515 * for constant folding, e.g., "regexp('foo' + 'bar')" but is
1516 * a bit simpler than doing full tracking of constants
1518 for (int i=0; i < expr->func->arity; i++) {
1519 if (expr->args[i]->tag != E_VALUE)
1524 STATE_ERROR(state, PATHX_ETYPE);
1528 /* Check the binary operators. Type rules:
1530 * '=', '!=' : T_NODESET -> T_NODESET -> T_BOOLEAN
1531 * T_STRING -> T_NODESET -> T_BOOLEAN
1532 * T_NODESET -> T_STRING -> T_BOOLEAN
1533 * T_NUMBER -> T_NUMBER -> T_BOOLEAN
1536 * '<', '<=' : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1537 * T_STRING -> T_STRING -> T_BOOLEAN
1538 * '+' : T_NUMBER -> T_NUMBER -> T_NUMBER
1539 * T_STRING -> T_STRING -> T_STRING
1540 * T_REGEXP -> T_REGEXP -> T_REGEXP
1541 * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1543 * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1544 * '=~', '!~' : T_STRING -> T_REGEXP -> T_BOOLEAN
1545 * T_NODESET -> T_REGEXP -> T_BOOLEAN
1547 * '|' : T_NODESET -> T_NODESET -> T_NODESET
1549 * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1551 static void check_binary(struct expr *expr, struct state *state) {
1552 check_expr(expr->left, state);
1553 check_expr(expr->right, state);
1556 enum type l = expr->left->type;
1557 enum type r = expr->right->type;
1564 ok = ((l == T_NODESET || l == T_STRING)
1565 && (r == T_NODESET || r == T_STRING))
1566 || (l == T_NUMBER && r == T_NUMBER);;
1573 ok = (l == T_NUMBER && r == T_NUMBER)
1574 || (l == T_STRING && r == T_STRING);
1578 ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1583 ok = (l == T_NUMBER && r == T_NUMBER);
1587 ok = (l == T_NODESET && r == T_NODESET);
1597 ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1604 STATE_ERROR(state, PATHX_ETYPE);
1610 static void check_var(struct expr *expr, struct state *state) {
1611 struct value *v = lookup_var(expr->ident, state->symtab);
1613 STATE_ERROR(state, PATHX_ENOVAR);
1616 expr->type = v->tag;
1619 /* Typecheck an expression */
1620 static void check_expr(struct expr *expr, struct state *state) {
1624 check_filter(expr, state);
1627 check_binary(expr, state);
1630 expr->type = expr_value(expr, state)->tag;
1633 check_var(expr, state);
1636 check_app(expr, state);
1644 * Utility functions for the parser
1647 static void skipws(struct state *state) {
1648 while (isspace(*state->pos)) state->pos += 1;
1651 static int match(struct state *state, char m) {
1654 if (*state->pos == '\0')
1656 if (*state->pos == m) {
1663 static int peek(struct state *state, const char *chars) {
1664 return strchr(chars, *state->pos) != NULL;
1667 /* Return 1 if STATE->POS starts with TOKEN, followed by optional
1668 * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1669 * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1672 static int looking_at(struct state *state, const char *token,
1673 const char *follow) {
1674 if (STREQLEN(state->pos, token, strlen(token))) {
1675 const char *p = state->pos + strlen(token);
1676 while (isspace(*p)) p++;
1677 if (STREQLEN(p, follow, strlen(follow))) {
1678 state->pos = p + strlen(follow);
1685 /*************************************************************************
1687 *************************************************************************/
1689 static void parse_expr(struct state *state);
1691 static struct expr* pop_expr(struct state *state) {
1692 if (state->exprs_used > 0) {
1693 state->exprs_used -= 1;
1694 return state->exprs[state->exprs_used];
1696 STATE_ERROR(state, PATHX_EINTERNAL);
1702 static void push_expr(struct expr *expr, struct state *state) {
1703 if (state->exprs_used >= state->exprs_size) {
1704 size_t new_size = 2*state->exprs_size;
1705 if (new_size == 0) new_size = 8;
1706 if (REALLOC_N(state->exprs, new_size) < 0) {
1710 state->exprs_size = new_size;
1712 state->exprs[state->exprs_used++] = expr;
1715 static void push_new_binary_op(enum binary_op op, struct state *state) {
1716 struct expr *expr = NULL;
1717 if (ALLOC(expr) < 0) {
1722 expr->tag = E_BINARY;
1724 expr->right = pop_expr(state);
1725 expr->left = pop_expr(state);
1726 push_expr(expr, state);
1729 int pathx_escape_name(const char *in, char **out) {
1731 int num_to_escape = 0;
1736 for (p = in; *p; p++) {
1737 if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1741 if (num_to_escape == 0)
1744 if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1747 for (p = in, s = *out; *p; p++) {
1748 if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1756 /* Return true if POS is preceded by an odd number of backslashes, i.e., if
1757 * POS is escaped. Stop the search when we get to START */
1758 static bool backslash_escaped(const char *pos, const char *start) {
1760 while (pos-- > start && *pos == '\\') {
1767 * NameNoWS ::= [^][|/\= \t\n] | \\.
1768 * NameWS ::= [^][|/\=] | \\.
1769 * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1771 static char *parse_name(struct state *state) {
1772 const char *s = state->pos;
1775 /* Advance state->pos until it points to the first character that is
1776 * not part of a name. */
1777 while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1778 /* Since we allow spaces in names, we need to avoid gobbling up
1779 * stuff that is in follow(Name), e.g. 'or' so that things like
1780 * [name1 or name2] still work. In other words, we'll parse 'x frob
1781 * y' as one name, but for 'x or y', we consider 'x' a name in its
1783 if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1784 STREQLEN(state->pos, " and ", strlen(" and ")))
1787 if (*state->pos == '\\') {
1789 if (*state->pos == '\0') {
1790 STATE_ERROR(state, PATHX_ENAME);
1797 /* Strip trailing white space. Make sure we respect escaped whitespace
1798 * and don't strip it as in "x\\ " */
1799 if (state->pos > s) {
1801 while (isspace(*state->pos) && state->pos > s
1802 && !backslash_escaped(state->pos, s))
1807 if (state->pos == s) {
1808 STATE_ERROR(state, PATHX_ENAME);
1812 result = strndup(s, state->pos - s);
1813 if (result == NULL) {
1819 for (char *t = result; *t != '\0'; t++, p++) {
1830 * Predicate ::= "[" Expr "]" *
1832 static struct pred *parse_predicates(struct state *state) {
1833 struct pred *pred = NULL;
1836 while (match(state, L_BRACK)) {
1841 if (! match(state, R_BRACK)) {
1842 STATE_ERROR(state, PATHX_EPRED);
1851 if (ALLOC(pred) < 0) {
1855 pred->nexpr = nexpr;
1857 if (ALLOC_N(pred->exprs, nexpr) < 0) {
1863 for (int i = nexpr - 1; i >= 0; i--)
1864 pred->exprs[i] = pop_expr(state);
1870 * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1871 * AxisSpecifier ::= AxisName '::' | <epsilon>
1872 * AxisName ::= 'ancestor'
1873 * | 'ancestor-or-self'
1876 * | 'descendant-or-self'
1881 static struct step *parse_step(struct state *state) {
1883 int explicit_axis = 0, allow_predicates = 1;
1885 if (ALLOC(step) < 0) {
1891 for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1892 if (looking_at(state, axis_names[i], "::")) {
1899 if (! match(state, '*')) {
1900 step->name = parse_name(state);
1901 if (HAS_ERROR(state))
1903 if (! explicit_axis) {
1904 if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
1905 step->axis = STREQ(step->name, ".") ? SELF : PARENT;
1907 allow_predicates = 0;
1912 if (allow_predicates) {
1913 step->predicates = parse_predicates(state);
1914 if (HAS_ERROR(state))
1925 static struct step *make_step(enum axis axis, struct state *state) {
1926 struct step *result = NULL;
1928 if (ALLOC(result) < 0) {
1932 result->axis = axis;
1937 * RelativeLocationPath ::= Step
1938 * | RelativeLocationPath '/' Step
1939 * | AbbreviatedRelativeLocationPath
1940 * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
1942 * The above is the same as
1943 * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
1945 static struct locpath *
1946 parse_relative_location_path(struct state *state) {
1947 struct step *step = NULL;
1948 struct locpath *locpath = NULL;
1950 step = parse_step(state);
1951 if (HAS_ERROR(state))
1954 if (ALLOC(locpath) < 0) {
1958 list_append(locpath->steps, step);
1961 while (match(state, '/')) {
1962 if (*state->pos == '/') {
1964 step = make_step(DESCENDANT_OR_SELF, state);
1969 list_append(locpath->steps, step);
1971 step = parse_step(state);
1972 if (HAS_ERROR(state))
1974 list_append(locpath->steps, step);
1981 free_locpath(locpath);
1986 * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
1987 * AbsoluteLocationPath ::= '/' RelativeLocationPath?
1988 * | AbbreviatedAbsoluteLocationPath
1989 * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
1992 static void parse_location_path(struct state *state) {
1993 struct expr *expr = NULL;
1994 struct locpath *locpath = NULL;
1996 if (match(state, '/')) {
1997 if (*state->pos == '/') {
1999 locpath = parse_relative_location_path(state);
2000 if (HAS_ERROR(state))
2002 struct step *step = make_step(DESCENDANT_OR_SELF, state);
2003 if (HAS_ERROR(state))
2005 list_cons(locpath->steps, step);
2007 if (*state->pos != '\0') {
2008 locpath = parse_relative_location_path(state);
2010 if (ALLOC(locpath) < 0)
2013 struct step *step = make_step(ROOT, state);
2014 if (HAS_ERROR(state)) {
2018 list_cons(locpath->steps, step);
2021 locpath = parse_relative_location_path(state);
2024 if (ALLOC(expr) < 0)
2026 expr->tag = E_FILTER;
2027 expr->locpath = locpath;
2028 push_expr(expr, state);
2035 free_locpath(locpath);
2040 * Number ::= /[0-9]+/
2042 static void parse_number(struct state *state) {
2043 struct expr *expr = NULL;
2048 val = strtoul(state->pos, &end, 10);
2049 if (errno || end == state->pos || (int) val != val) {
2050 STATE_ERROR(state, PATHX_ENUMBER);
2056 if (ALLOC(expr) < 0)
2058 expr->tag = E_VALUE;
2059 expr->value_ind = make_value(T_NUMBER, state);
2060 if (HAS_ERROR(state))
2062 expr_value(expr, state)->number = val;
2064 push_expr(expr, state);
2075 * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2077 static void parse_literal(struct state *state) {
2080 struct expr *expr = NULL;
2082 if (*state->pos == '"')
2084 else if (*state->pos == '\'')
2087 STATE_ERROR(state, PATHX_ESTRING);
2093 while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2095 if (*state->pos != delim) {
2096 STATE_ERROR(state, PATHX_EDELIM);
2101 if (ALLOC(expr) < 0)
2103 expr->tag = E_VALUE;
2104 expr->value_ind = make_value(T_STRING, state);
2105 if (HAS_ERROR(state))
2107 expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2108 if (expr_value(expr, state)->string == NULL)
2111 push_expr(expr, state);
2122 * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2124 static void parse_function_call(struct state *state) {
2125 const struct func *func = NULL;
2126 struct expr *expr = NULL;
2127 int nargs = 0, find = 0;
2129 for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2130 if (looking_at(state, builtin_funcs[find].name, "(")) {
2131 func = builtin_funcs + find;
2136 STATE_ERROR(state, PATHX_ENAME);
2140 if (! match(state, ')')) {
2145 } while (match(state, ','));
2147 if (! match(state, ')')) {
2148 STATE_ERROR(state, PATHX_EPAREN);
2153 int found = 0; /* Whether there is a builtin matching in name and arity */
2154 for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2155 if (STRNEQ(func->name, builtin_funcs[i].name))
2157 if (builtin_funcs[i].arity == nargs) {
2158 func = builtin_funcs + i;
2165 STATE_ERROR(state, PATHX_EARITY);
2169 if (ALLOC(expr) < 0) {
2174 if (ALLOC_N(expr->args, nargs) < 0) {
2180 for (int i = nargs - 1; i >= 0; i--)
2181 expr->args[i] = pop_expr(state);
2183 push_expr(expr, state);
2187 * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2189 * The '$' is consumed by parse_primary_expr
2191 static void parse_var(struct state *state) {
2192 const char *id = state->pos;
2193 struct expr *expr = NULL;
2195 if (!isalpha(*id) && *id != '_') {
2196 STATE_ERROR(state, PATHX_ENAME);
2200 while (isalpha(*id) || isdigit(*id) || *id == '_')
2203 if (ALLOC(expr) < 0)
2206 expr->ident = strndup(state->pos, id - state->pos);
2207 if (expr->ident == NULL)
2210 push_expr(expr, state);
2220 * PrimaryExpr ::= Literal
2223 * | VariableReference
2227 static void parse_primary_expr(struct state *state) {
2228 if (peek(state, "'\"")) {
2229 parse_literal(state);
2230 } else if (peek(state, "0123456789")) {
2231 parse_number(state);
2232 } else if (match(state, '(')) {
2235 if (! match(state, ')')) {
2236 STATE_ERROR(state, PATHX_EPAREN);
2239 } else if (match(state, '$')) {
2242 parse_function_call(state);
2246 static int looking_at_primary_expr(struct state *state) {
2247 const char *s = state->pos;
2248 /* Is it a Number, Literal or VariableReference ? */
2249 if (peek(state, "$'\"0123456789"))
2252 /* Or maybe a function call, i.e. a word followed by a '(' ?
2253 * Note that our function names are only [a-zA-Z]+
2255 while (*s != '\0' && isalpha(*s)) s++;
2256 while (*s != '\0' && isspace(*s)) s++;
2261 * PathExpr ::= LocationPath
2263 * | FilterExpr '/' RelativeLocationPath
2264 * | FilterExpr '//' RelativeLocationPath
2266 * FilterExpr ::= PrimaryExpr Predicate
2268 * The grammar is ambiguous here: the expression '42' can either be the
2269 * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2270 * reason for this ambiguity is that we allow node names like '42' in the
2271 * tree; rather than forbid them, we resolve the ambiguity by always
2272 * parsing '42' as a number, and requiring that the user write the
2273 * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2275 static void parse_path_expr(struct state *state) {
2276 struct expr *expr = NULL;
2277 struct pred *predicates = NULL;
2278 struct locpath *locpath = NULL;
2280 if (looking_at_primary_expr(state)) {
2281 parse_primary_expr(state);
2283 predicates = parse_predicates(state);
2285 if (match(state, '/')) {
2286 if (match(state, '/')) {
2287 locpath = parse_relative_location_path(state);
2288 if (HAS_ERROR(state))
2291 struct step *step = make_step(DESCENDANT_OR_SELF, state);
2292 if (HAS_ERROR(state))
2294 list_cons(locpath->steps, step);
2296 if (*state->pos == '\0') {
2297 STATE_ERROR(state, PATHX_EEND);
2300 locpath = parse_relative_location_path(state);
2303 /* A PathExpr without predicates and locpath is
2304 * just a PrimaryExpr
2306 if (predicates == NULL && locpath == NULL)
2308 /* To make evaluation easier, we parse something like
2309 * $var[pred] as $var[pred]/.
2311 if (locpath == NULL) {
2312 if (ALLOC(locpath) < 0)
2314 if (ALLOC(locpath->steps) < 0)
2316 locpath->steps->axis = SELF;
2318 if (ALLOC(expr) < 0)
2320 expr->tag = E_FILTER;
2321 expr->predicates = predicates;
2322 expr->primary = pop_expr(state);
2323 expr->locpath = locpath;
2324 push_expr(expr, state);
2326 parse_location_path(state);
2331 free_pred(predicates);
2332 free_locpath(locpath);
2337 * UnionExpr ::= PathExpr ('|' PathExpr)*
2339 static void parse_union_expr(struct state *state) {
2340 parse_path_expr(state);
2342 while (match(state, '|')) {
2343 parse_path_expr(state);
2345 push_new_binary_op(OP_UNION, state);
2350 * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2352 static void parse_multiplicative_expr(struct state *state) {
2353 parse_union_expr(state);
2355 while (match(state, '*')) {
2356 parse_union_expr(state);
2358 push_new_binary_op(OP_STAR, state);
2363 * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2364 * AdditiveOp ::= '+' | '-'
2366 static void parse_additive_expr(struct state *state) {
2367 parse_multiplicative_expr(state);
2369 while (*state->pos == '+' || *state->pos == '-') {
2370 enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2373 parse_multiplicative_expr(state);
2375 push_new_binary_op(op, state);
2380 * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2381 * RelationalOp ::= ">" | "<" | ">=" | "<="
2383 static void parse_relational_expr(struct state *state) {
2384 parse_additive_expr(state);
2386 if (*state->pos == '<' || *state->pos == '>') {
2387 enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2389 if (*state->pos == '=') {
2390 op = (op == OP_LT) ? OP_LE : OP_GE;
2394 parse_additive_expr(state);
2396 push_new_binary_op(op, state);
2401 * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2402 * EqualityOp ::= "=" | "!="
2403 * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2404 * MatchOp ::= "=~" | "!~"
2406 static void parse_equality_expr(struct state *state) {
2407 parse_relational_expr(state);
2409 if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2410 enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2413 parse_relational_expr(state);
2415 push_new_binary_op(op, state);
2416 } else if (*state->pos == '=' ||
2417 (*state->pos == '!' && state->pos[1] == '=')) {
2418 enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2419 state->pos += (op == OP_EQ) ? 1 : 2;
2421 parse_relational_expr(state);
2423 push_new_binary_op(op, state);
2428 * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2430 static void parse_and_expr(struct state *state) {
2431 parse_equality_expr(state);
2433 while (*state->pos == 'a' && state->pos[1] == 'n'
2434 && state->pos[2] == 'd') {
2437 parse_equality_expr(state);
2439 push_new_binary_op(OP_AND, state);
2444 * OrExpr ::= AndExpr ('or' AndExpr)*
2446 static void parse_or_expr(struct state *state) {
2447 parse_and_expr(state);
2449 while (*state->pos == 'o' && state->pos[1] == 'r') {
2452 parse_and_expr(state);
2454 push_new_binary_op(OP_OR, state);
2461 static void parse_expr(struct state *state) {
2463 parse_or_expr(state);
2466 static void store_error(struct pathx *pathx) {
2467 const char *pathx_msg = NULL;
2468 const char *path = pathx->state->txt;
2469 const pathx_errcode_t errcode = pathx->state->errcode;
2470 struct error *err = pathx->state->error;
2472 char *pos_str = pathx->state->errmsg;
2473 pathx->state->errmsg = NULL;
2475 if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2480 err->code = AUG_ENOMEM;
2483 err->code = AUG_EMMATCH;
2485 case PATHX_ENOMATCH:
2486 err->code = AUG_ENOMATCH;
2489 err->code = AUG_EPATHX;
2493 /* We only need details for pathx syntax errors */
2494 if (err->code != AUG_EPATHX)
2498 pathx_msg = pathx_error(pathx, NULL, &pos);
2500 bool has_msg = pos_str != NULL;
2501 int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2502 if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2504 strcat(pos_str, " in ");
2505 strncat(pos_str, path, pos);
2507 /* initialize pos_str explicitly, path might be "" */
2509 strncat(pos_str, path, pos);
2511 strcat(pos_str, "|=|");
2512 strcat(pos_str, path + pos);
2515 err->minor = errcode;
2516 err->details = pos_str;
2518 err->minor_details = pathx_msg;
2521 int pathx_parse(const struct tree *tree,
2525 struct pathx_symtab *symtab,
2526 struct tree *root_ctx,
2527 struct pathx **pathx) {
2528 struct state *state = NULL;
2532 if (ALLOC(*pathx) < 0)
2535 (*pathx)->origin = (struct tree *) tree;
2538 if (ALLOC((*pathx)->state) < 0)
2540 state = (*pathx)->state;
2542 state->errcode = PATHX_NOERROR;
2543 state->errmsg = NULL;
2546 state->symtab = symtab;
2547 state->root_ctx = root_ctx;
2550 if (ALLOC_N(state->value_pool, 8) < 0) {
2554 state->value_pool_size = 8;
2555 state->value_pool[0].tag = T_BOOLEAN;
2556 state->value_pool[0].boolval = 0;
2557 state->value_pool[1].tag = T_BOOLEAN;
2558 state->value_pool[1].boolval = 1;
2559 state->value_pool_used = 2;
2563 if (HAS_ERROR(state))
2565 if (state->pos != state->txt + strlen(state->txt)) {
2566 STATE_ERROR(state, PATHX_EEND);
2570 if (state->exprs_used != 1) {
2571 STATE_ERROR(state, PATHX_EINTERNAL);
2576 check_expr(state->exprs[0], state);
2577 if (HAS_ERROR(state))
2580 if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2581 STATE_ERROR(state, PATHX_ETYPE);
2586 store_error(*pathx);
2587 return state->errcode;
2592 err->code = AUG_ENOMEM;
2593 return PATHX_ENOMEM;
2596 /*************************************************************************
2597 * Searching in the tree
2598 *************************************************************************/
2600 static bool step_matches(struct step *step, struct tree *tree) {
2601 if (step->name == NULL) {
2602 return step->axis == ROOT || tree->label != NULL;
2604 return streqx(step->name, tree->label);
2608 static struct tree *tree_prev(struct tree *pos) {
2609 struct tree *node = NULL;
2610 if (pos != pos->parent->children) {
2611 for (node = pos->parent->children;
2618 /* When the first step doesn't begin with ROOT then use relative root context
2620 static struct tree *step_root(struct step *step, struct tree *ctx,
2621 struct tree *root_ctx) {
2622 struct tree *node = NULL;
2623 switch (step->axis) {
2629 case PRECEDING_SIBLING:
2630 case FOLLOWING_SIBLING:
2631 /* only use root_ctx when ctx is the absolute tree root */
2632 if (ctx == ctx->parent && root_ctx != NULL)
2638 case DESCENDANT_OR_SELF:
2649 static struct tree *step_first(struct step *step, struct tree *ctx) {
2650 struct tree *node = NULL;
2651 switch (step->axis) {
2653 case DESCENDANT_OR_SELF:
2658 node = ctx->children;
2665 while (ctx->parent != ctx)
2669 case PRECEDING_SIBLING:
2670 node = tree_prev(ctx);
2672 case FOLLOWING_SIBLING:
2680 if (step_matches(step, node))
2682 return step_next(step, ctx, node);
2685 static struct tree *step_next(struct step *step, struct tree *ctx,
2686 struct tree *node) {
2687 while (node != NULL) {
2688 switch (step->axis) {
2696 case DESCENDANT_OR_SELF:
2697 if (node->children != NULL) {
2698 node = node->children;
2700 while (node->next == NULL && node != ctx)
2701 node = node->parent;
2713 if (node->parent == node)
2716 node = node->parent;
2718 case PRECEDING_SIBLING:
2719 node = tree_prev(node);
2721 case FOLLOWING_SIBLING:
2727 if (node != NULL && step_matches(step, node))
2733 static struct value *pathx_eval(struct pathx *pathx) {
2734 struct state *state = pathx->state;
2735 state->ctx = pathx->origin;
2738 eval_expr(state->exprs[0], state);
2739 if (HAS_ERROR(state))
2742 if (state->values_used != 1) {
2743 STATE_ERROR(state, PATHX_EINTERNAL);
2746 return pop_value(state);
2749 struct tree *pathx_next(struct pathx *pathx) {
2750 if (pathx->node + 1 < pathx->nodeset->used)
2751 return pathx->nodeset->nodes[++pathx->node];
2755 /* Find the first node in TREE matching PATH. */
2756 struct tree *pathx_first(struct pathx *pathx) {
2757 if (pathx->nodeset == NULL) {
2758 struct value *v = pathx_eval(pathx);
2760 if (HAS_ERROR(pathx->state))
2762 assert(v->tag == T_NODESET);
2763 pathx->nodeset = v->nodeset;
2766 if (pathx->nodeset->used == 0)
2769 return pathx->nodeset->nodes[0];
2775 /* Find a node in the tree that matches the longest prefix of PATH.
2777 * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2778 * prefix matches, and -1 if more than one node in the tree match.
2780 * TMATCH is set to the tree node that matches, and SMATCH to the next step
2781 * after the one where TMATCH matched. If no node matches or multiple nodes
2782 * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2783 * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2785 static int locpath_search(struct locpath_trace *lpt,
2786 struct tree **tmatch, struct step **smatch) {
2790 for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2792 *smatch = lpt->lp->steps;
2796 if (lpt->ns[last]->used > 1) {
2801 *tmatch = lpt->ns[last]->nodes[0];
2802 *smatch = lpt->lp->steps;
2803 for (int i=0; i < last; i++)
2804 *smatch = (*smatch)->next;
2806 for (int i=0; i < lpt->maxns; i++)
2807 free_nodeset(lpt->ns[i]);
2812 /* Expand the tree ROOT so that it contains all components of PATH. PATH
2813 * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2815 * Return the first segment that was created by this operation, or NULL on
2818 int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2820 struct step *step = NULL;
2821 struct locpath_trace lpt;
2822 struct tree *first_child = NULL;
2823 struct value *v = NULL;
2826 path->state->locpath_trace = &lpt;
2827 v = pathx_eval(path);
2828 path->state->locpath_trace = NULL;
2829 if (HAS_ERROR(path->state))
2832 if (lpt.maxns == 0) {
2833 if (v->tag != T_NODESET || v->nodeset->used == 0) {
2834 STATE_ERROR(path->state, PATHX_ENOMATCH);
2837 if (v->nodeset->used > 1)
2839 *tree = v->nodeset->nodes[0];
2843 *tree = path->origin;
2844 r = locpath_search(&lpt, tree, &step);
2846 STATE_ERROR(path->state, PATHX_EMMATCH);
2853 struct tree *parent = *tree;
2855 parent = path->origin;
2857 list_for_each(s, step) {
2858 if (s->name == NULL || s->axis != CHILD)
2860 struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2861 if (first_child == NULL)
2863 if (t == NULL || t->label == NULL)
2865 list_append(parent->children, t);
2869 while (first_child->children != NULL)
2870 first_child = first_child->children;
2872 *tree = first_child;
2876 if (first_child != NULL) {
2877 list_remove(first_child, first_child->parent->children);
2878 free_tree(first_child);
2885 int pathx_find_one(struct pathx *path, struct tree **tree) {
2886 *tree = pathx_first(path);
2887 if (HAS_ERROR(path->state))
2889 return path->nodeset->used;
2892 struct error *err_of_pathx(struct pathx *px) {
2893 return px->state->error;
2896 const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
2897 int errcode = PATHX_ENOMEM;
2900 if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
2901 errcode = path->state->errcode;
2903 errcode = PATHX_EINTERNAL;
2906 *txt = path->state->txt;
2909 *pos = path->state->pos - path->state->txt;
2911 return errcodes[errcode];
2917 static struct pathx_symtab
2918 *make_symtab(struct pathx_symtab *symtab, const char *name,
2919 struct value *value)
2921 struct pathx_symtab *new;
2928 if (ALLOC(new) < 0) {
2934 if (symtab == NULL) {
2937 new->next = symtab->next;
2943 void free_symtab(struct pathx_symtab *symtab) {
2945 while (symtab != NULL) {
2946 struct pathx_symtab *del = symtab;
2949 release_value(del->value);
2955 struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
2956 return pathx->state->symtab;
2959 static int pathx_symtab_set(struct pathx_symtab **symtab,
2960 const char *name, struct value *v) {
2963 list_for_each(tab, *symtab) {
2964 if (STREQ(tab->name, name)) {
2965 release_value(tab->value);
2974 struct pathx_symtab *new = NULL;
2976 new = make_symtab(*symtab, name, v);
2986 int pathx_symtab_define(struct pathx_symtab **symtab,
2987 const char *name, struct pathx *px) {
2989 struct value *value = NULL, *v = NULL;
2990 struct state *state = px->state;
2992 value = pathx_eval(px);
2993 if (HAS_ERROR(px->state))
3002 value->tag = T_BOOLEAN;
3004 r = pathx_symtab_set(symtab, name, v);
3010 if (v->tag == T_NODESET)
3011 return v->nodeset->used;
3015 release_value(value);
3023 int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
3024 struct pathx_symtab *del = NULL;
3027 del != NULL && !STREQ(del->name, name);
3031 list_remove(del, *symtab);
3036 int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
3037 const char *name, struct tree *tree) {
3038 struct value *v = NULL;
3045 if (ALLOC(v->nodeset) < 0)
3047 if (ALLOC_N(v->nodeset->nodes, 1) < 0)
3049 v->nodeset->used = 1;
3050 v->nodeset->size = 1;
3051 v->nodeset->nodes[0] = tree;
3053 r = pathx_symtab_set(symtab, name, v);
3064 pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
3065 struct value *v = lookup_var(name, symtab);
3067 if (v == NULL || v->tag != T_NODESET)
3070 return v->nodeset->used;
3074 pathx_symtab_get_tree(struct pathx_symtab *symtab,
3075 const char *name, int i) {
3076 struct value *v = lookup_var(name, symtab);
3079 if (v->tag != T_NODESET)
3081 if (i >= v->nodeset->used)
3083 return v->nodeset->nodes[i];
3086 void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3087 const struct tree *tree) {
3088 list_for_each(tab, symtab) {
3089 if (tab->value->tag != T_NODESET)
3091 struct nodeset *ns = tab->value->nodeset;
3092 for (int i=0; i < ns->used;) {
3093 struct tree *t = ns->nodes[i];
3094 while (t != t->parent && t != tree)
3097 ns_remove(ns, i, 1);
3106 * indent-tabs-mode: nil