2 * pathx.c: handling path expressions
4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
34 static const char *const errcodes[] = {
37 "illegal string literal",
39 "string missing ending ' or \"",
45 "internal error", /* PATHX_EINTERNAL */
46 "type error", /* PATHX_ETYPE */
47 "undefined variable", /* PATHX_ENOVAR */
48 "garbage at end of path expression", /* PATHX_EEND */
49 "no match for path expression", /* PATHX_ENOMATCH */
50 "wrong number of arguments in function call", /* PATHX_EARITY */
51 "invalid regular expression", /* PATHX_EREGEXP */
52 "too many matches", /* PATHX_EMMATCH */
53 "wrong flag for regexp" /* PATHX_EREGEXPFLAG */
57 * Path expressions are strings that use a notation modelled on XPath.
61 T_NONE = 0, /* Not a type */
89 OP_RE_MATCH, /* '=~' */
90 OP_RE_NOMATCH, /* '!~' */
111 /* This array is indexed by enum axis */
112 static const char *const axis_names[] = {
116 "descendant-or-self",
124 /* The characters that can follow a name in a location expression (aka path)
125 * The parser will assume that name (path component) is finished when it
126 * encounters any of these characters, unless they are escaped by preceding
129 * See parse_name for the gory details
131 static const char name_follow[] = "][|/=()!,";
133 /* Doubly linked list of location steps. Besides the information from the
134 * path expression, also contains information to iterate over a node set,
135 * in particular, the context node CTX for the step, and the current node
136 * CUR within that context.
141 char *name; /* NULL to match any name */
142 struct pred *predicates;
145 /* Initialise the root nodeset with the first step */
146 static struct tree *step_root(struct step *step, struct tree *ctx,
147 struct tree *root_ctx);
148 /* Iteration over the nodes on a step, ignoring the predicates */
149 static struct tree *step_first(struct step *step, struct tree *ctx);
150 static struct tree *step_next(struct step *step, struct tree *ctx,
153 struct pathx_symtab {
154 struct pathx_symtab *next;
161 struct nodeset *nodeset;
179 typedef uint32_t value_ind_t;
184 struct nodeset *nodeset; /* T_NODESET */
185 int64_t number; /* T_NUMBER */
186 char *string; /* T_STRING */
187 bool boolval; /* T_BOOLEAN */
188 struct regexp *regexp; /* T_REGEXP */
196 struct { /* E_FILTER */
197 struct expr *primary;
198 struct pred *predicates;
199 struct locpath *locpath;
201 struct { /* E_BINARY */
206 value_ind_t value_ind; /* E_VALUE */
207 char *ident; /* E_VAR */
209 const struct func *func;
215 struct locpath_trace {
221 /* Internal state of the evaluator/parser */
223 pathx_errcode_t errcode;
228 const char *txt; /* Entire expression */
229 const char *pos; /* Current position within TXT during parsing */
231 struct tree *ctx; /* The current node */
235 struct tree *root_ctx; /* Root context for relative paths */
237 /* A table of all values. The table is dynamically reallocated, i.e.
238 * pointers to struct value should not be used across calls that
239 * might allocate new values
241 * value_pool[0] is always the boolean false, and value_pool[1]
242 * always the boolean true
244 struct value *value_pool;
245 value_ind_t value_pool_used;
246 value_ind_t value_pool_size;
247 /* Stack of values (as indices into value_pool), with bottom of
248 stack in values[0] */
252 /* Stack of expressions, with bottom of stack in exprs[0] */
256 /* Trace of a locpath evaluation, needed by pathx_expand_tree.
257 Generally NULL, unless a trace is needed.
259 struct locpath_trace *locpath_trace;
260 /* Symbol table for variable lookups */
261 struct pathx_symtab *symtab;
262 /* Error structure, used to communicate errors to struct augeas;
263 * we never own this structure, and therefore never free it */
267 /* We consider NULL and the empty string to be equal */
269 static inline int streqx(const char *s1, const char *s2) {
271 return (s2 == NULL || strlen(s2) == 0);
273 return strlen(s1) == 0;
274 return STREQ(s1, s2);
279 typedef void (*func_impl_t)(struct state *state, int nargs);
285 const enum type *arg_types;
289 static void func_last(struct state *state, int nargs);
290 static void func_position(struct state *state, int nargs);
291 static void func_count(struct state *state, int nargs);
292 static void func_label(struct state *state, int nargs);
293 static void func_regexp(struct state *state, int nargs);
294 static void func_regexp_flag(struct state *state, int nargs);
295 static void func_glob(struct state *state, int nargs);
296 static void func_int(struct state *state, int nargs);
297 static void func_not(struct state *state, int nargs);
299 static const enum type arg_types_nodeset[] = { T_NODESET };
300 static const enum type arg_types_string[] = { T_STRING };
301 static const enum type arg_types_bool[] = { T_BOOLEAN };
302 static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
303 static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
305 static const struct func builtin_funcs[] = {
306 { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
308 { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
309 .impl = func_position },
310 { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
311 .impl = func_label },
312 { .name = "count", .arity = 1, .type = T_NUMBER,
313 .arg_types = arg_types_nodeset,
314 .impl = func_count },
315 { .name = "regexp", .arity = 1, .type = T_REGEXP,
316 .arg_types = arg_types_string,
317 .impl = func_regexp },
318 { .name = "regexp", .arity = 1, .type = T_REGEXP,
319 .arg_types = arg_types_nodeset,
320 .impl = func_regexp },
321 { .name = "regexp", .arity = 2, .type = T_REGEXP,
322 .arg_types = arg_types_string_string,
323 .impl = func_regexp_flag },
324 { .name = "regexp", .arity = 2, .type = T_REGEXP,
325 .arg_types = arg_types_nodeset_string,
326 .impl = func_regexp_flag },
327 { .name = "glob", .arity = 1, .type = T_REGEXP,
328 .arg_types = arg_types_string,
330 { .name = "glob", .arity = 1, .type = T_REGEXP,
331 .arg_types = arg_types_nodeset,
333 { .name = "int", .arity = 1, .type = T_NUMBER,
334 .arg_types = arg_types_string, .impl = func_int },
335 { .name = "int", .arity = 1, .type = T_NUMBER,
336 .arg_types = arg_types_nodeset, .impl = func_int },
337 { .name = "int", .arity = 1, .type = T_NUMBER,
338 .arg_types = arg_types_bool, .impl = func_int },
339 { .name = "not", .arity = 1, .type = T_BOOLEAN,
340 .arg_types = arg_types_bool, .impl = func_not }
343 #define RET_ON_ERROR \
344 if (state->errcode != PATHX_NOERROR) return
346 #define RET0_ON_ERROR \
347 if (state->errcode != PATHX_NOERROR) return 0
349 #define STATE_ERROR(state, err) \
351 state->errcode = err; \
352 state->file = __FILE__; \
353 state->line = __LINE__; \
356 #define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
358 #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
361 * Free the various data structures
364 static void free_expr(struct expr *expr);
366 static void free_pred(struct pred *pred) {
370 for (int i=0; i < pred->nexpr; i++) {
371 free_expr(pred->exprs[i]);
377 static void free_step(struct step *step) {
378 while (step != NULL) {
379 struct step *del = step;
382 free_pred(del->predicates);
387 static void free_locpath(struct locpath *locpath) {
390 while (locpath->steps != NULL) {
391 struct step *step = locpath->steps;
392 locpath->steps = step->next;
394 free_pred(step->predicates);
400 static void free_expr(struct expr *expr) {
405 free_expr(expr->primary);
406 free_pred(expr->predicates);
407 free_locpath(expr->locpath);
410 free_expr(expr->left);
411 free_expr(expr->right);
419 for (int i=0; i < expr->func->arity; i++)
420 free_expr(expr->args[i]);
429 static void free_nodeset(struct nodeset *ns) {
436 /* Free all objects used by VALUE, but not VALUE itself */
437 static void release_value(struct value *v) {
443 free_nodeset(v->nodeset);
452 unref(v->regexp, regexp);
459 static void free_state(struct state *state) {
463 for(int i=0; i < state->exprs_used; i++)
464 free_expr(state->exprs[i]);
467 for(int i=0; i < state->value_pool_used; i++)
468 release_value(state->value_pool + i);
469 free(state->value_pool);
474 void free_pathx(struct pathx *pathx) {
477 free_state(pathx->state);
484 static struct nodeset *make_nodeset(struct state *state) {
485 struct nodeset *result;
486 if (ALLOC(result) < 0)
491 /* Add NODE to NS if it is not in NS yet. This relies on the flag
492 * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
493 * as soon as we are done adding nodes to it.
495 static void ns_add(struct nodeset *ns, struct tree *node,
496 struct state *state) {
499 if (ns->used >= ns->size) {
500 size_t size = 2 * ns->size;
501 if (size < 10) size = 10;
502 if (REALLOC_N(ns->nodes, size) < 0)
506 ns->nodes[ns->used] = node;
511 static void ns_clear_added(struct nodeset *ns) {
512 for (int i=0; i < ns->used; i++)
513 ns->nodes[i]->added = 0;
516 static struct nodeset *
517 clone_nodeset(struct nodeset *ns, struct state *state)
519 struct nodeset *clone;
520 if (ALLOC(clone) < 0) {
524 if (ALLOC_N(clone->nodes, ns->used) < 0) {
529 clone->used = ns->used;
530 clone->size = ns->used;
531 for (int i=0; i < ns->used; i++)
532 clone->nodes[i] = ns->nodes[i];
539 static value_ind_t make_value(enum type tag, struct state *state) {
540 assert(tag != T_BOOLEAN);
542 if (state->value_pool_used >= state->value_pool_size) {
543 value_ind_t new_size = 2*state->value_pool_size;
544 if (new_size <= state->value_pool_size) {
548 if (REALLOC_N(state->value_pool, new_size) < 0) {
552 state->value_pool_size = new_size;
554 state->value_pool[state->value_pool_used].tag = tag;
555 state->value_pool[state->value_pool_used].nodeset = NULL;
556 return state->value_pool_used++;
559 static value_ind_t clone_value(struct value *v, struct state *state) {
560 value_ind_t vind = make_value(v->tag, state);
562 struct value *clone = state->value_pool + vind;
566 clone->nodeset = clone_nodeset(v->nodeset, state);
569 clone->number = v->number;
572 clone->string = strdup(v->string);
573 if (clone->string == NULL) {
578 clone->boolval = v->boolval;
581 clone->regexp = ref(v->regexp);
589 static value_ind_t pop_value_ind(struct state *state) {
590 if (state->values_used > 0) {
591 state->values_used -= 1;
592 return state->values[state->values_used];
594 STATE_ERROR(state, PATHX_EINTERNAL);
600 static struct value *pop_value(struct state *state) {
601 value_ind_t vind = pop_value_ind(state);
602 if (HAS_ERROR(state))
604 return state->value_pool + vind;
607 static void push_value(value_ind_t vind, struct state *state) {
608 if (state->values_used >= state->values_size) {
609 size_t new_size = 2*state->values_size;
610 if (new_size == 0) new_size = 8;
611 if (REALLOC_N(state->values, new_size) < 0) {
615 state->values_size = new_size;
617 state->values[state->values_used++] = vind;
620 static void push_boolean_value(int b, struct state *state) {
621 push_value(b != 0, state);
625 static struct value *expr_value(struct expr *expr, struct state *state) {
626 return state->value_pool + expr->value_ind;
629 /*************************************************************************
631 ************************************************************************/
632 static void eval_expr(struct expr *expr, struct state *state);
635 #define ensure_arity(min, max) \
636 if (nargs < min || nargs > max) { \
637 STATE_ERROR(state, PATHX_EINTERNAL); \
641 static void func_last(struct state *state, int nargs) {
643 value_ind_t vind = make_value(T_NUMBER, state);
646 state->value_pool[vind].number = state->ctx_len;
647 push_value(vind, state);
650 static void func_position(struct state *state, int nargs) {
652 value_ind_t vind = make_value(T_NUMBER, state);
655 state->value_pool[vind].number = state->ctx_pos;
656 push_value(vind, state);
659 static void func_count(struct state *state, int nargs) {
661 value_ind_t vind = make_value(T_NUMBER, state);
664 struct value *ns = pop_value(state);
665 state->value_pool[vind].number = ns->nodeset->used;
666 push_value(vind, state);
669 static void func_label(struct state *state, int nargs) {
671 value_ind_t vind = make_value(T_STRING, state);
676 if (state->ctx->label)
677 s = strdup(state->ctx->label);
684 state->value_pool[vind].string = s;
685 push_value(vind, state);
688 static void func_int(struct state *state, int nargs) {
690 value_ind_t vind = make_value(T_NUMBER, state);
694 struct value *v = pop_value(state);
695 if (v->tag == T_BOOLEAN) {
698 const char *s = NULL;
699 if (v->tag == T_STRING) {
703 if (v->nodeset->used != 1) {
704 STATE_ERROR(state, PATHX_EMMATCH);
707 s = v->nodeset->nodes[0]->value;
711 r = xstrtoint64(s, 10, &i);
713 STATE_ERROR(state, PATHX_ENUMBER);
718 state->value_pool[vind].number = i;
719 push_value(vind, state);
722 static void func_not(struct state *state, int nargs) {
726 struct value *v = pop_value(state);
727 if (v->tag == T_BOOLEAN) {
728 push_boolean_value(! v->boolval, state);
732 static struct regexp *
733 nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
734 struct regexp *result = NULL;
735 struct regexp **rx = NULL;
738 for (int i = 0; i < ns->used; i++) {
739 if (ns->nodes[i]->value != NULL)
744 /* If the nodeset is empty, make sure we produce a regexp
745 * that never matches anything */
746 result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
748 if (ALLOC_N(rx, ns->used) < 0)
750 for (int i=0; i < ns->used; i++) {
751 if (ns->nodes[i]->value == NULL)
755 rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
757 rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
761 result = regexp_union_n(info, ns->used, rx);
766 for (int i=0; i < ns->used; i++)
767 unref(rx[i], regexp);
773 static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
774 value_ind_t vind = make_value(T_REGEXP, state);
779 struct value *v = pop_value(state);
780 struct regexp *rx = NULL;
782 if (v->tag == T_STRING) {
784 rx = make_regexp_from_glob(state->error->info, v->string);
786 rx = make_regexp_unescape(state->error->info, v->string, nocase);
787 } else if (v->tag == T_NODESET) {
788 rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
798 state->value_pool[vind].regexp = rx;
799 r = regexp_compile(rx);
802 regexp_check(rx, &msg);
803 state->errmsg = strdup(msg);
804 STATE_ERROR(state, PATHX_EREGEXP);
807 push_value(vind, state);
810 static void func_regexp(struct state *state, int nargs) {
812 func_regexp_or_glob(state, 0, 0);
815 static void func_regexp_flag(struct state *state, int nargs) {
818 struct value *f = pop_value(state);
820 if (STREQ("i", f->string))
823 STATE_ERROR(state, PATHX_EREGEXPFLAG);
825 func_regexp_or_glob(state, 0, nocase);
828 static void func_glob(struct state *state, int nargs) {
830 func_regexp_or_glob(state, 1, 0);
833 static bool coerce_to_bool(struct value *v) {
836 return v->nodeset->used > 0;
842 return v->number > 0;
845 return strlen(v->string) > 0;
855 static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
857 for (int i1=0; i1 < ns1->used; i1++) {
858 struct tree *t1 = ns1->nodes[i1];
859 for (int i2=0; i2 < ns2->used; i2++) {
860 struct tree *t2 = ns2->nodes[i2];
862 if (!streqx(t1->value, t2->value))
865 if (streqx(t1->value, t2->value))
873 static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
875 for (int i=0; i < ns->used; i++) {
876 struct tree *t = ns->nodes[i];
878 if (!streqx(t->value, s))
881 if (streqx(t->value, s))
888 static void eval_eq(struct state *state, int neq) {
889 struct value *r = pop_value(state);
890 struct value *l = pop_value(state);
893 if (l->tag == T_NODESET && r->tag == T_NODESET) {
894 res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
895 } else if (l->tag == T_NODESET) {
896 res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
897 } else if (r->tag == T_NODESET) {
898 res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
899 } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
901 res = (l->number != r->number);
903 res = (l->number == r->number);
905 assert(l->tag == T_STRING);
906 assert(r->tag == T_STRING);
907 res = streqx(l->string, r->string);
913 push_boolean_value(res, state);
916 static void eval_arith(struct state *state, enum binary_op op) {
917 value_ind_t vind = make_value(T_NUMBER, state);
918 struct value *r = pop_value(state);
919 struct value *l = pop_value(state);
922 assert(l->tag == T_NUMBER);
923 assert(r->tag == T_NUMBER);
928 res = l->number + r->number;
929 else if (op == OP_MINUS)
930 res = l->number - r->number;
931 else if (op == OP_STAR)
932 res = l->number * r->number;
936 state->value_pool[vind].number = res;
937 push_value(vind, state);
940 static void eval_rel(struct state *state, bool greater, bool equal) {
944 /* We always check l < r or l <= r */
946 l = pop_value(state);
947 r = pop_value(state);
949 r = pop_value(state);
950 l = pop_value(state);
952 if (l->tag == T_NUMBER) {
954 res = (l->number <= r->number);
956 res = (l->number < r->number);
957 } else if (l->tag == T_STRING) {
958 int cmp = strcmp(l->string, r->string);
967 push_boolean_value(res, state);
970 static void eval_and_or(struct state *state, enum binary_op op) {
971 struct value *r = pop_value(state);
972 struct value *l = pop_value(state);
973 bool left = coerce_to_bool(l);
974 bool right = coerce_to_bool(r);
978 push_boolean_value(left && right, state);
980 push_boolean_value(left || right, state);
983 static bool eval_re_match_str(struct state *state, struct regexp *rx,
990 r = regexp_match(rx, str, strlen(str), 0, NULL);
992 STATE_ERROR(state, PATHX_EINTERNAL);
993 } else if (r == -3) {
994 /* We should never get this far; func_regexp should catch
998 return r == strlen(str);
1001 static void eval_union(struct state *state) {
1002 value_ind_t vind = make_value(T_NODESET, state);
1003 struct value *r = pop_value(state);
1004 struct value *l = pop_value(state);
1005 struct nodeset *res = NULL;
1007 assert(l->tag == T_NODESET);
1008 assert(r->tag == T_NODESET);
1012 res = clone_nodeset(l->nodeset, state);
1014 for (int i=0; i < r->nodeset->used; i++) {
1015 ns_add(res, r->nodeset->nodes[i], state);
1016 if (HAS_ERROR(state))
1019 state->value_pool[vind].nodeset = res;
1020 push_value(vind, state);
1022 ns_clear_added(res);
1025 static void eval_concat_string(struct state *state) {
1026 value_ind_t vind = make_value(T_STRING, state);
1027 struct value *r = pop_value(state);
1028 struct value *l = pop_value(state);
1033 if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1037 strcpy(res, l->string);
1038 strcat(res, r->string);
1039 state->value_pool[vind].string = res;
1040 push_value(vind, state);
1043 static void eval_concat_regexp(struct state *state) {
1044 value_ind_t vind = make_value(T_REGEXP, state);
1045 struct value *r = pop_value(state);
1046 struct value *l = pop_value(state);
1047 struct regexp *rx = NULL;
1051 rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1057 state->value_pool[vind].regexp = rx;
1058 push_value(vind, state);
1061 static void eval_re_match(struct state *state, enum binary_op op) {
1062 struct value *rx = pop_value(state);
1063 struct value *v = pop_value(state);
1065 bool result = false;
1067 if (v->tag == T_STRING) {
1068 result = eval_re_match_str(state, rx->regexp, v->string);
1070 } else if (v->tag == T_NODESET) {
1071 for (int i=0; i < v->nodeset->used && result == false; i++) {
1072 struct tree *t = v->nodeset->nodes[i];
1073 result = eval_re_match_str(state, rx->regexp, t->value);
1077 if (op == OP_RE_NOMATCH)
1079 push_boolean_value(result, state);
1082 static void eval_binary(struct expr *expr, struct state *state) {
1083 eval_expr(expr->left, state);
1084 eval_expr(expr->right, state);
1095 eval_rel(state, false, false);
1098 eval_rel(state, false, true);
1101 eval_rel(state, true, false);
1104 eval_rel(state, true, true);
1107 if (expr->type == T_NUMBER)
1108 eval_arith(state, expr->op);
1109 else if (expr->type == T_STRING)
1110 eval_concat_string(state);
1111 else if (expr->type == T_REGEXP)
1112 eval_concat_regexp(state);
1116 eval_arith(state, expr->op);
1120 eval_and_or(state, expr->op);
1127 eval_re_match(state, expr->op);
1134 static void eval_app(struct expr *expr, struct state *state) {
1135 assert(expr->tag == E_APP);
1137 for (int i=0; i < expr->func->arity; i++) {
1138 eval_expr(expr->args[i], state);
1141 expr->func->impl(state, expr->func->arity);
1144 static bool eval_pred(struct expr *expr, struct state *state) {
1145 eval_expr(expr, state);
1148 struct value *v = pop_value(state);
1153 return (state->ctx_pos == v->number);
1155 return v->nodeset->used > 0;
1157 return streqv(state->ctx->value, v->string);
1164 /* Remove COUNT successive entries from NS. The first entry to remove is at
1166 static void ns_remove(struct nodeset *ns, int ind, int count) {
1169 memmove(ns->nodes + ind, ns->nodes + ind+count,
1170 sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1175 * Remove all nodes from NS for which one of PRED is false
1177 static void ns_filter(struct nodeset *ns, struct pred *predicates,
1178 struct state *state) {
1179 if (predicates == NULL)
1182 struct tree *old_ctx = state->ctx;
1183 uint old_ctx_len = state->ctx_len;
1184 uint old_ctx_pos = state->ctx_pos;
1186 for (int p=0; p < predicates->nexpr; p++) {
1187 int first_bad = -1; /* The index of the first non-matching node */
1188 state->ctx_len = ns->used;
1190 for (int i=0; i < ns->used; state->ctx_pos++) {
1191 state->ctx = ns->nodes[i];
1192 bool match = eval_pred(predicates->exprs[p], state);
1194 /* We remove non-matching nodes from NS in batches; this logic
1195 * makes sure that we only call ns_remove at the end of a run
1196 * of non-matching nodes
1199 if (first_bad >= 0) {
1200 ns_remove(ns, first_bad, i - first_bad);
1207 if (first_bad == -1)
1212 if (first_bad >= 0) {
1213 ns_remove(ns, first_bad, ns->used - first_bad);
1217 state->ctx = old_ctx;
1218 state->ctx_pos = old_ctx_pos;
1219 state->ctx_len = old_ctx_len;
1222 /* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1223 static bool position_pred(struct pred *pred) {
1224 return pred != NULL &&
1226 pred->exprs[0]->tag == E_VALUE &&
1227 pred->exprs[0]->type == T_NUMBER;
1230 /* Return the tree node at the position implied by STEP->PREDICATES. It is
1231 assumed and required that STEP->PREDICATES is actually a
1234 This method hand-optimizes the important case of a path expression like
1237 static struct tree *position_filter(struct nodeset *ns,
1239 struct state *state) {
1240 int value_ind = step->predicates->exprs[0]->value_ind;
1241 int number = state->value_pool[value_ind].number;
1244 for (int i=0; i < ns->used; i++) {
1245 for (struct tree *node = step_first(step, ns->nodes[i]);
1247 node = step_next(step, ns->nodes[i], node), pos++) {
1256 /* Return an array of nodesets, one for each step in the locpath.
1258 * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1259 * contain the nodes that matched the entire locpath
1261 static void ns_from_locpath(struct locpath *lp, uint *maxns,
1262 struct nodeset ***ns,
1263 const struct nodeset *root,
1264 struct state *state) {
1265 struct tree *old_ctx = state->ctx;
1268 ensure(lp != NULL, state);
1271 list_for_each(step, lp->steps)
1273 if (ALLOC_N(*ns, *maxns+1) < 0) {
1274 STATE_ERROR(state, PATHX_ENOMEM);
1277 for (int i=0; i <= *maxns; i++) {
1278 (*ns)[i] = make_nodeset(state);
1279 if (HAS_ERROR(state))
1284 struct step *first_step = NULL;
1285 first_step = lp->steps;
1287 struct tree *root_tree;
1288 root_tree = step_root(first_step, state->ctx, state->root_ctx);
1289 ns_add((*ns)[0], root_tree, state);
1290 ns_clear_added((*ns)[0]);
1292 for (int i=0; i < root->used; i++)
1293 ns_add((*ns)[0], root->nodes[i], state);
1294 ns_clear_added((*ns)[0]);
1297 if (HAS_ERROR(state))
1301 list_for_each(step, lp->steps) {
1302 struct nodeset *work = (*ns)[cur_ns];
1303 struct nodeset *next = (*ns)[cur_ns + 1];
1304 if (position_pred(step->predicates)) {
1305 struct tree *node = position_filter(work, step, state);
1307 ns_add(next, node, state);
1308 ns_clear_added(next);
1311 for (int i=0; i < work->used; i++) {
1312 for (struct tree *node = step_first(step, work->nodes[i]);
1314 node = step_next(step, work->nodes[i], node)) {
1315 ns_add(next, node, state);
1318 ns_clear_added(next);
1319 ns_filter(next, step->predicates, state);
1320 if (HAS_ERROR(state))
1326 state->ctx = old_ctx;
1330 for (int i=0; i <= *maxns; i++)
1331 free_nodeset((*ns)[i]);
1334 state->ctx = old_ctx;
1338 static void eval_filter(struct expr *expr, struct state *state) {
1339 struct locpath *lp = expr->locpath;
1340 struct nodeset **ns = NULL;
1341 struct locpath_trace *lpt = state->locpath_trace;
1344 state->locpath_trace = NULL;
1345 if (expr->primary == NULL) {
1346 ns_from_locpath(lp, &maxns, &ns, NULL, state);
1348 eval_expr(expr->primary, state);
1350 value_ind_t primary_ind = pop_value_ind(state);
1351 struct value *primary = state->value_pool + primary_ind;
1352 assert(primary->tag == T_NODESET);
1353 ns_filter(primary->nodeset, expr->predicates, state);
1354 /* Evaluating predicates might have reallocated the value_pool */
1355 primary = state->value_pool + primary_ind;
1356 ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1360 value_ind_t vind = make_value(T_NODESET, state);
1362 state->value_pool[vind].nodeset = ns[maxns];
1363 push_value(vind, state);
1366 assert(lpt->ns == NULL);
1367 assert(lpt->lp == NULL);
1371 state->locpath_trace = lpt;
1373 for (int i=0; i < maxns; i++)
1374 free_nodeset(ns[i]);
1379 static struct value *lookup_var(const char *ident,
1380 const struct pathx_symtab *symtab) {
1381 list_for_each(tab, symtab) {
1382 if (STREQ(ident, tab->name))
1388 static void eval_var(struct expr *expr, struct state *state) {
1389 struct value *v = lookup_var(expr->ident, state->symtab);
1390 value_ind_t vind = clone_value(v, state);
1392 push_value(vind, state);
1395 static void eval_expr(struct expr *expr, struct state *state) {
1397 switch (expr->tag) {
1399 eval_filter(expr, state);
1402 eval_binary(expr, state);
1405 push_value(expr->value_ind, state);
1408 eval_var(expr, state);
1411 eval_app(expr, state);
1418 /*************************************************************************
1420 *************************************************************************/
1422 static void check_expr(struct expr *expr, struct state *state);
1424 /* Typecheck a list of predicates. A predicate is a function of
1425 * one of the following types:
1427 * T_NODESET -> T_BOOLEAN
1428 * T_NUMBER -> T_BOOLEAN (position test)
1429 * T_BOOLEAN -> T_BOOLEAN
1431 static void check_preds(struct pred *pred, struct state *state) {
1434 for (int i=0; i < pred->nexpr; i++) {
1435 struct expr *e = pred->exprs[i];
1436 check_expr(e, state);
1438 if (e->type != T_NODESET && e->type != T_NUMBER &&
1439 e->type != T_BOOLEAN && e->type != T_STRING) {
1440 STATE_ERROR(state, PATHX_ETYPE);
1446 static void check_filter(struct expr *expr, struct state *state) {
1447 assert(expr->tag == E_FILTER);
1448 struct locpath *locpath = expr->locpath;
1450 if (expr->primary != NULL) {
1451 check_expr(expr->primary, state);
1452 if (expr->primary->type != T_NODESET) {
1453 STATE_ERROR(state, PATHX_ETYPE);
1456 check_preds(expr->predicates, state);
1459 list_for_each(s, locpath->steps) {
1460 check_preds(s->predicates, state);
1463 expr->type = T_NODESET;
1466 static void check_app(struct expr *expr, struct state *state) {
1467 assert(expr->tag == E_APP);
1469 for (int i=0; i < expr->func->arity; i++) {
1470 check_expr(expr->args[i], state);
1475 for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1476 const struct func *fn = builtin_funcs + f;
1477 if (STRNEQ(expr->func->name, fn->name))
1479 if (expr->func->arity != fn->arity)
1483 for (int i=0; i < expr->func->arity; i++) {
1484 if (expr->args[i]->type != fn->arg_types[i]) {
1493 if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1494 expr->func = builtin_funcs + f;
1495 expr->type = expr->func->type;
1497 STATE_ERROR(state, PATHX_ETYPE);
1501 /* Check the binary operators. Type rules:
1503 * '=', '!=' : T_NODESET -> T_NODESET -> T_BOOLEAN
1504 * T_STRING -> T_NODESET -> T_BOOLEAN
1505 * T_NODESET -> T_STRING -> T_BOOLEAN
1506 * T_NUMBER -> T_NUMBER -> T_BOOLEAN
1509 * '<', '<=' : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1510 * T_STRING -> T_STRING -> T_BOOLEAN
1511 * '+' : T_NUMBER -> T_NUMBER -> T_NUMBER
1512 * T_STRING -> T_STRING -> T_STRING
1513 * T_REGEXP -> T_REGEXP -> T_REGEXP
1514 * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1516 * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1517 * '=~', '!~' : T_STRING -> T_REGEXP -> T_BOOLEAN
1518 * T_NODESET -> T_REGEXP -> T_BOOLEAN
1520 * '|' : T_NODESET -> T_NODESET -> T_NODESET
1522 * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1524 static void check_binary(struct expr *expr, struct state *state) {
1525 check_expr(expr->left, state);
1526 check_expr(expr->right, state);
1529 enum type l = expr->left->type;
1530 enum type r = expr->right->type;
1537 ok = ((l == T_NODESET || l == T_STRING)
1538 && (r == T_NODESET || r == T_STRING))
1539 || (l == T_NUMBER && r == T_NUMBER);;
1546 ok = (l == T_NUMBER && r == T_NUMBER)
1547 || (l == T_STRING && r == T_STRING);
1551 ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1556 ok = (l == T_NUMBER && r == T_NUMBER);
1560 ok = (l == T_NODESET && r == T_NODESET);
1570 ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1577 STATE_ERROR(state, PATHX_ETYPE);
1583 static void check_var(struct expr *expr, struct state *state) {
1584 struct value *v = lookup_var(expr->ident, state->symtab);
1586 STATE_ERROR(state, PATHX_ENOVAR);
1589 expr->type = v->tag;
1592 /* Typecheck an expression */
1593 static void check_expr(struct expr *expr, struct state *state) {
1597 check_filter(expr, state);
1600 check_binary(expr, state);
1603 expr->type = expr_value(expr, state)->tag;
1606 check_var(expr, state);
1609 check_app(expr, state);
1617 * Utility functions for the parser
1620 static void skipws(struct state *state) {
1621 while (isspace(*state->pos)) state->pos += 1;
1624 static int match(struct state *state, char m) {
1627 if (*state->pos == '\0')
1629 if (*state->pos == m) {
1636 static int peek(struct state *state, const char *chars) {
1637 return strchr(chars, *state->pos) != NULL;
1640 /* Return 1 if STATE->POS starts with TOKEN, followed by optional
1641 * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1642 * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1645 static int looking_at(struct state *state, const char *token,
1646 const char *follow) {
1647 if (STREQLEN(state->pos, token, strlen(token))) {
1648 const char *p = state->pos + strlen(token);
1649 while (isspace(*p)) p++;
1650 if (STREQLEN(p, follow, strlen(follow))) {
1651 state->pos = p + strlen(follow);
1658 /*************************************************************************
1660 *************************************************************************/
1662 static void parse_expr(struct state *state);
1664 static struct expr* pop_expr(struct state *state) {
1665 if (state->exprs_used > 0) {
1666 state->exprs_used -= 1;
1667 return state->exprs[state->exprs_used];
1669 STATE_ERROR(state, PATHX_EINTERNAL);
1675 static void push_expr(struct expr *expr, struct state *state) {
1676 if (state->exprs_used >= state->exprs_size) {
1677 size_t new_size = 2*state->exprs_size;
1678 if (new_size == 0) new_size = 8;
1679 if (REALLOC_N(state->exprs, new_size) < 0) {
1683 state->exprs_size = new_size;
1685 state->exprs[state->exprs_used++] = expr;
1688 static void push_new_binary_op(enum binary_op op, struct state *state) {
1689 struct expr *expr = NULL;
1690 if (ALLOC(expr) < 0) {
1695 expr->tag = E_BINARY;
1697 expr->right = pop_expr(state);
1698 expr->left = pop_expr(state);
1699 push_expr(expr, state);
1702 int pathx_escape_name(const char *in, char **out) {
1704 int num_to_escape = 0;
1709 for (p = in; *p; p++) {
1710 if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1714 if (num_to_escape == 0)
1717 if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1720 for (p = in, s = *out; *p; p++) {
1721 if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1729 /* Return true if POS is preceded by an odd number of backslashes, i.e., if
1730 * POS is escaped. Stop the search when we get to START */
1731 static bool backslash_escaped(const char *pos, const char *start) {
1733 while (pos-- > start && *pos == '\\') {
1740 * NameNoWS ::= [^][|/\= \t\n] | \\.
1741 * NameWS ::= [^][|/\=] | \\.
1742 * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1744 static char *parse_name(struct state *state) {
1745 const char *s = state->pos;
1748 /* Advance state->pos until it points to the first character that is
1749 * not part of a name. */
1750 while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1751 /* Since we allow spaces in names, we need to avoid gobbling up
1752 * stuff that is in follow(Name), e.g. 'or' so that things like
1753 * [name1 or name2] still work. In other words, we'll parse 'x frob
1754 * y' as one name, but for 'x or y', we consider 'x' a name in its
1756 if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1757 STREQLEN(state->pos, " and ", strlen(" and ")))
1760 if (*state->pos == '\\') {
1762 if (*state->pos == '\0') {
1763 STATE_ERROR(state, PATHX_ENAME);
1770 /* Strip trailing white space. Make sure we respect escaped whitespace
1771 * and don't strip it as in "x\\ " */
1772 if (state->pos > s) {
1774 while (isspace(*state->pos) && state->pos > s
1775 && !backslash_escaped(state->pos, s))
1780 if (state->pos == s) {
1781 STATE_ERROR(state, PATHX_ENAME);
1785 result = strndup(s, state->pos - s);
1786 if (result == NULL) {
1792 for (char *t = result; *t != '\0'; t++, p++) {
1803 * Predicate ::= "[" Expr "]" *
1805 static struct pred *parse_predicates(struct state *state) {
1806 struct pred *pred = NULL;
1809 while (match(state, L_BRACK)) {
1814 if (! match(state, R_BRACK)) {
1815 STATE_ERROR(state, PATHX_EPRED);
1824 if (ALLOC(pred) < 0) {
1828 pred->nexpr = nexpr;
1830 if (ALLOC_N(pred->exprs, nexpr) < 0) {
1836 for (int i = nexpr - 1; i >= 0; i--)
1837 pred->exprs[i] = pop_expr(state);
1843 * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1844 * AxisSpecifier ::= AxisName '::' | <epsilon>
1845 * AxisName ::= 'ancestor'
1846 * | 'ancestor-or-self'
1849 * | 'descendant-or-self'
1854 static struct step *parse_step(struct state *state) {
1856 int explicit_axis = 0, allow_predicates = 1;
1858 if (ALLOC(step) < 0) {
1864 for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1865 if (looking_at(state, axis_names[i], "::")) {
1872 if (! match(state, '*')) {
1873 step->name = parse_name(state);
1874 if (HAS_ERROR(state))
1876 if (! explicit_axis) {
1877 if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
1878 step->axis = STREQ(step->name, ".") ? SELF : PARENT;
1880 allow_predicates = 0;
1885 if (allow_predicates) {
1886 step->predicates = parse_predicates(state);
1887 if (HAS_ERROR(state))
1898 static struct step *make_step(enum axis axis, struct state *state) {
1899 struct step *result = NULL;
1901 if (ALLOC(result) < 0) {
1905 result->axis = axis;
1910 * RelativeLocationPath ::= Step
1911 * | RelativeLocationPath '/' Step
1912 * | AbbreviatedRelativeLocationPath
1913 * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
1915 * The above is the same as
1916 * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
1918 static struct locpath *
1919 parse_relative_location_path(struct state *state) {
1920 struct step *step = NULL;
1921 struct locpath *locpath = NULL;
1923 step = parse_step(state);
1924 if (HAS_ERROR(state))
1927 if (ALLOC(locpath) < 0) {
1931 list_append(locpath->steps, step);
1934 while (match(state, '/')) {
1935 if (*state->pos == '/') {
1937 step = make_step(DESCENDANT_OR_SELF, state);
1942 list_append(locpath->steps, step);
1944 step = parse_step(state);
1945 if (HAS_ERROR(state))
1947 list_append(locpath->steps, step);
1954 free_locpath(locpath);
1959 * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
1960 * AbsoluteLocationPath ::= '/' RelativeLocationPath?
1961 * | AbbreviatedAbsoluteLocationPath
1962 * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
1965 static void parse_location_path(struct state *state) {
1966 struct expr *expr = NULL;
1967 struct locpath *locpath = NULL;
1969 if (match(state, '/')) {
1970 if (*state->pos == '/') {
1972 locpath = parse_relative_location_path(state);
1973 if (HAS_ERROR(state))
1975 struct step *step = make_step(DESCENDANT_OR_SELF, state);
1976 if (HAS_ERROR(state))
1978 list_cons(locpath->steps, step);
1980 if (*state->pos != '\0') {
1981 locpath = parse_relative_location_path(state);
1983 if (ALLOC(locpath) < 0)
1986 struct step *step = make_step(ROOT, state);
1987 if (HAS_ERROR(state)) {
1991 list_cons(locpath->steps, step);
1994 locpath = parse_relative_location_path(state);
1997 if (ALLOC(expr) < 0)
1999 expr->tag = E_FILTER;
2000 expr->locpath = locpath;
2001 push_expr(expr, state);
2008 free_locpath(locpath);
2013 * Number ::= /[0-9]+/
2015 static void parse_number(struct state *state) {
2016 struct expr *expr = NULL;
2021 val = strtoul(state->pos, &end, 10);
2022 if (errno || end == state->pos || (int) val != val) {
2023 STATE_ERROR(state, PATHX_ENUMBER);
2029 if (ALLOC(expr) < 0)
2031 expr->tag = E_VALUE;
2032 expr->value_ind = make_value(T_NUMBER, state);
2033 if (HAS_ERROR(state))
2035 expr_value(expr, state)->number = val;
2037 push_expr(expr, state);
2048 * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2050 static void parse_literal(struct state *state) {
2053 struct expr *expr = NULL;
2055 if (*state->pos == '"')
2057 else if (*state->pos == '\'')
2060 STATE_ERROR(state, PATHX_ESTRING);
2066 while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2068 if (*state->pos != delim) {
2069 STATE_ERROR(state, PATHX_EDELIM);
2074 if (ALLOC(expr) < 0)
2076 expr->tag = E_VALUE;
2077 expr->value_ind = make_value(T_STRING, state);
2078 if (HAS_ERROR(state))
2080 expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2081 if (expr_value(expr, state)->string == NULL)
2084 push_expr(expr, state);
2095 * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2097 static void parse_function_call(struct state *state) {
2098 const struct func *func = NULL;
2099 struct expr *expr = NULL;
2100 int nargs = 0, find = 0;
2102 for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2103 if (looking_at(state, builtin_funcs[find].name, "(")) {
2104 func = builtin_funcs + find;
2109 STATE_ERROR(state, PATHX_ENAME);
2113 if (! match(state, ')')) {
2118 } while (match(state, ','));
2120 if (! match(state, ')')) {
2121 STATE_ERROR(state, PATHX_EPAREN);
2126 int found = 0; /* Whether there is a builtin matching in name and arity */
2127 for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2128 if (STRNEQ(func->name, builtin_funcs[i].name))
2130 if (builtin_funcs[i].arity == nargs) {
2131 func = builtin_funcs + i;
2138 STATE_ERROR(state, PATHX_EARITY);
2142 if (ALLOC(expr) < 0) {
2147 if (ALLOC_N(expr->args, nargs) < 0) {
2153 for (int i = nargs - 1; i >= 0; i--)
2154 expr->args[i] = pop_expr(state);
2156 push_expr(expr, state);
2160 * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2162 * The '$' is consumed by parse_primary_expr
2164 static void parse_var(struct state *state) {
2165 const char *id = state->pos;
2166 struct expr *expr = NULL;
2168 if (!isalpha(*id) && *id != '_') {
2169 STATE_ERROR(state, PATHX_ENAME);
2173 while (isalpha(*id) || isdigit(*id) || *id == '_')
2176 if (ALLOC(expr) < 0)
2179 expr->ident = strndup(state->pos, id - state->pos);
2180 if (expr->ident == NULL)
2183 push_expr(expr, state);
2193 * PrimaryExpr ::= Literal
2196 * | VariableReference
2200 static void parse_primary_expr(struct state *state) {
2201 if (peek(state, "'\"")) {
2202 parse_literal(state);
2203 } else if (peek(state, "0123456789")) {
2204 parse_number(state);
2205 } else if (match(state, '(')) {
2208 if (! match(state, ')')) {
2209 STATE_ERROR(state, PATHX_EPAREN);
2212 } else if (match(state, '$')) {
2215 parse_function_call(state);
2219 static int looking_at_primary_expr(struct state *state) {
2220 const char *s = state->pos;
2221 /* Is it a Number, Literal or VariableReference ? */
2222 if (peek(state, "$'\"0123456789"))
2225 /* Or maybe a function call, i.e. a word followed by a '(' ?
2226 * Note that our function names are only [a-zA-Z]+
2228 while (*s != '\0' && isalpha(*s)) s++;
2229 while (*s != '\0' && isspace(*s)) s++;
2234 * PathExpr ::= LocationPath
2236 * | FilterExpr '/' RelativeLocationPath
2237 * | FilterExpr '//' RelativeLocationPath
2239 * FilterExpr ::= PrimaryExpr Predicate
2241 * The grammar is ambiguous here: the expression '42' can either be the
2242 * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2243 * reason for this ambiguity is that we allow node names like '42' in the
2244 * tree; rather than forbid them, we resolve the ambiguity by always
2245 * parsing '42' as a number, and requiring that the user write the
2246 * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2248 static void parse_path_expr(struct state *state) {
2249 struct expr *expr = NULL;
2250 struct pred *predicates = NULL;
2251 struct locpath *locpath = NULL;
2253 if (looking_at_primary_expr(state)) {
2254 parse_primary_expr(state);
2256 predicates = parse_predicates(state);
2258 if (match(state, '/')) {
2259 if (match(state, '/')) {
2260 locpath = parse_relative_location_path(state);
2261 if (HAS_ERROR(state))
2264 struct step *step = make_step(DESCENDANT_OR_SELF, state);
2265 if (HAS_ERROR(state))
2267 list_cons(locpath->steps, step);
2269 if (*state->pos == '\0') {
2270 STATE_ERROR(state, PATHX_EEND);
2273 locpath = parse_relative_location_path(state);
2276 /* A PathExpr without predicates and locpath is
2277 * just a PrimaryExpr
2279 if (predicates == NULL && locpath == NULL)
2281 /* To make evaluation easier, we parse something like
2282 * $var[pred] as $var[pred]/.
2284 if (locpath == NULL) {
2285 if (ALLOC(locpath) < 0)
2287 if (ALLOC(locpath->steps) < 0)
2289 locpath->steps->axis = SELF;
2291 if (ALLOC(expr) < 0)
2293 expr->tag = E_FILTER;
2294 expr->predicates = predicates;
2295 expr->primary = pop_expr(state);
2296 expr->locpath = locpath;
2297 push_expr(expr, state);
2299 parse_location_path(state);
2304 free_pred(predicates);
2305 free_locpath(locpath);
2310 * UnionExpr ::= PathExpr ('|' PathExpr)*
2312 static void parse_union_expr(struct state *state) {
2313 parse_path_expr(state);
2315 while (match(state, '|')) {
2316 parse_path_expr(state);
2318 push_new_binary_op(OP_UNION, state);
2323 * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2325 static void parse_multiplicative_expr(struct state *state) {
2326 parse_union_expr(state);
2328 while (match(state, '*')) {
2329 parse_union_expr(state);
2331 push_new_binary_op(OP_STAR, state);
2336 * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2337 * AdditiveOp ::= '+' | '-'
2339 static void parse_additive_expr(struct state *state) {
2340 parse_multiplicative_expr(state);
2342 while (*state->pos == '+' || *state->pos == '-') {
2343 enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2346 parse_multiplicative_expr(state);
2348 push_new_binary_op(op, state);
2353 * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2354 * RelationalOp ::= ">" | "<" | ">=" | "<="
2356 static void parse_relational_expr(struct state *state) {
2357 parse_additive_expr(state);
2359 if (*state->pos == '<' || *state->pos == '>') {
2360 enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2362 if (*state->pos == '=') {
2363 op = (op == OP_LT) ? OP_LE : OP_GE;
2367 parse_additive_expr(state);
2369 push_new_binary_op(op, state);
2374 * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2375 * EqualityOp ::= "=" | "!="
2376 * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2377 * MatchOp ::= "=~" | "!~"
2379 static void parse_equality_expr(struct state *state) {
2380 parse_relational_expr(state);
2382 if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2383 enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2386 parse_relational_expr(state);
2388 push_new_binary_op(op, state);
2389 } else if (*state->pos == '=' ||
2390 (*state->pos == '!' && state->pos[1] == '=')) {
2391 enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2392 state->pos += (op == OP_EQ) ? 1 : 2;
2394 parse_relational_expr(state);
2396 push_new_binary_op(op, state);
2401 * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2403 static void parse_and_expr(struct state *state) {
2404 parse_equality_expr(state);
2406 while (*state->pos == 'a' && state->pos[1] == 'n'
2407 && state->pos[2] == 'd') {
2410 parse_equality_expr(state);
2412 push_new_binary_op(OP_AND, state);
2417 * OrExpr ::= AndExpr ('or' AndExpr)*
2419 static void parse_or_expr(struct state *state) {
2420 parse_and_expr(state);
2422 while (*state->pos == 'o' && state->pos[1] == 'r') {
2425 parse_and_expr(state);
2427 push_new_binary_op(OP_OR, state);
2434 static void parse_expr(struct state *state) {
2436 parse_or_expr(state);
2439 static void store_error(struct pathx *pathx) {
2440 const char *pathx_msg = NULL;
2441 const char *path = pathx->state->txt;
2442 const pathx_errcode_t errcode = pathx->state->errcode;
2443 struct error *err = pathx->state->error;
2445 char *pos_str = pathx->state->errmsg;
2446 pathx->state->errmsg = NULL;
2448 if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2453 err->code = AUG_ENOMEM;
2456 err->code = AUG_EMMATCH;
2458 case PATHX_ENOMATCH:
2459 err->code = AUG_ENOMATCH;
2462 err->code = AUG_EPATHX;
2466 /* We only need details for pathx syntax errors */
2467 if (err->code != AUG_EPATHX)
2471 pathx_msg = pathx_error(pathx, NULL, &pos);
2473 bool has_msg = pos_str != NULL;
2474 int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2475 if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2477 strcat(pos_str, " in ");
2478 strncat(pos_str, path, pos);
2480 /* initialize pos_str explicitly, path might be "" */
2482 strncat(pos_str, path, pos);
2484 strcat(pos_str, "|=|");
2485 strcat(pos_str, path + pos);
2488 err->minor = errcode;
2489 err->details = pos_str;
2491 err->minor_details = pathx_msg;
2494 int pathx_parse(const struct tree *tree,
2498 struct pathx_symtab *symtab,
2499 struct tree *root_ctx,
2500 struct pathx **pathx) {
2501 struct state *state = NULL;
2505 if (ALLOC(*pathx) < 0)
2508 (*pathx)->origin = (struct tree *) tree;
2511 if (ALLOC((*pathx)->state) < 0)
2513 state = (*pathx)->state;
2515 state->errcode = PATHX_NOERROR;
2516 state->errmsg = NULL;
2519 state->symtab = symtab;
2520 state->root_ctx = root_ctx;
2523 if (ALLOC_N(state->value_pool, 8) < 0) {
2527 state->value_pool_size = 8;
2528 state->value_pool[0].tag = T_BOOLEAN;
2529 state->value_pool[0].boolval = 0;
2530 state->value_pool[1].tag = T_BOOLEAN;
2531 state->value_pool[1].boolval = 1;
2532 state->value_pool_used = 2;
2536 if (HAS_ERROR(state))
2538 if (state->pos != state->txt + strlen(state->txt)) {
2539 STATE_ERROR(state, PATHX_EEND);
2543 if (state->exprs_used != 1) {
2544 STATE_ERROR(state, PATHX_EINTERNAL);
2549 check_expr(state->exprs[0], state);
2550 if (HAS_ERROR(state))
2553 if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2554 STATE_ERROR(state, PATHX_ETYPE);
2559 store_error(*pathx);
2560 return state->errcode;
2565 err->code = AUG_ENOMEM;
2566 return PATHX_ENOMEM;
2569 /*************************************************************************
2570 * Searching in the tree
2571 *************************************************************************/
2573 static bool step_matches(struct step *step, struct tree *tree) {
2574 if (step->name == NULL) {
2575 return step->axis == ROOT || tree->label != NULL;
2577 return streqx(step->name, tree->label);
2581 static struct tree *tree_prev(struct tree *pos) {
2582 struct tree *node = NULL;
2583 if (pos != pos->parent->children) {
2584 for (node = pos->parent->children;
2591 /* When the first step doesn't begin with ROOT then use relative root context
2593 static struct tree *step_root(struct step *step, struct tree *ctx,
2594 struct tree *root_ctx) {
2595 struct tree *node = NULL;
2596 switch (step->axis) {
2602 case PRECEDING_SIBLING:
2603 case FOLLOWING_SIBLING:
2604 /* only use root_ctx when ctx is the absolute tree root */
2605 if (ctx == ctx->parent && root_ctx != NULL)
2611 case DESCENDANT_OR_SELF:
2622 static struct tree *step_first(struct step *step, struct tree *ctx) {
2623 struct tree *node = NULL;
2624 switch (step->axis) {
2626 case DESCENDANT_OR_SELF:
2631 node = ctx->children;
2638 while (ctx->parent != ctx)
2642 case PRECEDING_SIBLING:
2643 node = tree_prev(ctx);
2645 case FOLLOWING_SIBLING:
2653 if (step_matches(step, node))
2655 return step_next(step, ctx, node);
2658 static struct tree *step_next(struct step *step, struct tree *ctx,
2659 struct tree *node) {
2660 while (node != NULL) {
2661 switch (step->axis) {
2669 case DESCENDANT_OR_SELF:
2670 if (node->children != NULL) {
2671 node = node->children;
2673 while (node->next == NULL && node != ctx)
2674 node = node->parent;
2686 if (node->parent == node)
2689 node = node->parent;
2691 case PRECEDING_SIBLING:
2692 node = tree_prev(node);
2694 case FOLLOWING_SIBLING:
2700 if (node != NULL && step_matches(step, node))
2706 static struct value *pathx_eval(struct pathx *pathx) {
2707 struct state *state = pathx->state;
2708 state->ctx = pathx->origin;
2711 eval_expr(state->exprs[0], state);
2712 if (HAS_ERROR(state))
2715 if (state->values_used != 1) {
2716 STATE_ERROR(state, PATHX_EINTERNAL);
2719 return pop_value(state);
2722 struct tree *pathx_next(struct pathx *pathx) {
2723 if (pathx->node + 1 < pathx->nodeset->used)
2724 return pathx->nodeset->nodes[++pathx->node];
2728 /* Find the first node in TREE matching PATH. */
2729 struct tree *pathx_first(struct pathx *pathx) {
2730 if (pathx->nodeset == NULL) {
2731 struct value *v = pathx_eval(pathx);
2733 if (HAS_ERROR(pathx->state))
2735 assert(v->tag == T_NODESET);
2736 pathx->nodeset = v->nodeset;
2739 if (pathx->nodeset->used == 0)
2742 return pathx->nodeset->nodes[0];
2748 /* Find a node in the tree that matches the longest prefix of PATH.
2750 * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2751 * prefix matches, and -1 if more than one node in the tree match.
2753 * TMATCH is set to the tree node that matches, and SMATCH to the next step
2754 * after the one where TMATCH matched. If no node matches or multiple nodes
2755 * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2756 * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2758 static int locpath_search(struct locpath_trace *lpt,
2759 struct tree **tmatch, struct step **smatch) {
2763 for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2765 *smatch = lpt->lp->steps;
2769 if (lpt->ns[last]->used > 1) {
2774 *tmatch = lpt->ns[last]->nodes[0];
2775 *smatch = lpt->lp->steps;
2776 for (int i=0; i < last; i++)
2777 *smatch = (*smatch)->next;
2779 for (int i=0; i < lpt->maxns; i++)
2780 free_nodeset(lpt->ns[i]);
2785 /* Expand the tree ROOT so that it contains all components of PATH. PATH
2786 * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2788 * Return the first segment that was created by this operation, or NULL on
2791 int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2793 struct step *step = NULL;
2794 struct locpath_trace lpt;
2795 struct tree *first_child = NULL;
2796 struct value *v = NULL;
2799 path->state->locpath_trace = &lpt;
2800 v = pathx_eval(path);
2801 path->state->locpath_trace = NULL;
2802 if (HAS_ERROR(path->state))
2805 if (lpt.maxns == 0) {
2806 if (v->tag != T_NODESET || v->nodeset->used == 0) {
2807 STATE_ERROR(path->state, PATHX_ENOMATCH);
2810 if (v->nodeset->used > 1)
2812 *tree = v->nodeset->nodes[0];
2816 *tree = path->origin;
2817 r = locpath_search(&lpt, tree, &step);
2819 STATE_ERROR(path->state, PATHX_EMMATCH);
2826 struct tree *parent = *tree;
2828 parent = path->origin;
2830 list_for_each(s, step) {
2831 if (s->name == NULL || s->axis != CHILD)
2833 struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2834 if (first_child == NULL)
2836 if (t == NULL || t->label == NULL)
2838 list_append(parent->children, t);
2842 while (first_child->children != NULL)
2843 first_child = first_child->children;
2845 *tree = first_child;
2849 if (first_child != NULL) {
2850 list_remove(first_child, first_child->parent->children);
2851 free_tree(first_child);
2858 int pathx_find_one(struct pathx *path, struct tree **tree) {
2859 *tree = pathx_first(path);
2860 if (HAS_ERROR(path->state))
2862 return path->nodeset->used;
2865 struct error *err_of_pathx(struct pathx *px) {
2866 return px->state->error;
2869 const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
2870 int errcode = PATHX_ENOMEM;
2873 if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
2874 errcode = path->state->errcode;
2876 errcode = PATHX_EINTERNAL;
2879 *txt = path->state->txt;
2882 *pos = path->state->pos - path->state->txt;
2884 return errcodes[errcode];
2890 static struct pathx_symtab
2891 *make_symtab(struct pathx_symtab *symtab, const char *name,
2892 struct value *value)
2894 struct pathx_symtab *new;
2901 if (ALLOC(new) < 0) {
2907 if (symtab == NULL) {
2910 new->next = symtab->next;
2916 void free_symtab(struct pathx_symtab *symtab) {
2918 while (symtab != NULL) {
2919 struct pathx_symtab *del = symtab;
2922 release_value(del->value);
2928 struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
2929 return pathx->state->symtab;
2932 static int pathx_symtab_set(struct pathx_symtab **symtab,
2933 const char *name, struct value *v) {
2936 list_for_each(tab, *symtab) {
2937 if (STREQ(tab->name, name)) {
2938 release_value(tab->value);
2947 struct pathx_symtab *new = NULL;
2949 new = make_symtab(*symtab, name, v);
2959 int pathx_symtab_define(struct pathx_symtab **symtab,
2960 const char *name, struct pathx *px) {
2962 struct value *value = NULL, *v = NULL;
2963 struct state *state = px->state;
2965 value = pathx_eval(px);
2966 if (HAS_ERROR(px->state))
2975 value->tag = T_BOOLEAN;
2977 r = pathx_symtab_set(symtab, name, v);
2983 if (v->tag == T_NODESET)
2984 return v->nodeset->used;
2988 release_value(value);
2996 int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
2997 struct pathx_symtab *del = NULL;
3000 del != NULL && !STREQ(del->name, name);
3004 list_remove(del, *symtab);
3009 int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
3010 const char *name, struct tree *tree) {
3011 struct value *v = NULL;
3018 if (ALLOC(v->nodeset) < 0)
3020 if (ALLOC_N(v->nodeset->nodes, 1) < 0)
3022 v->nodeset->used = 1;
3023 v->nodeset->size = 1;
3024 v->nodeset->nodes[0] = tree;
3026 r = pathx_symtab_set(symtab, name, v);
3037 pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
3038 struct value *v = lookup_var(name, symtab);
3040 if (v == NULL || v->tag != T_NODESET)
3043 return v->nodeset->used;
3047 pathx_symtab_get_tree(struct pathx_symtab *symtab,
3048 const char *name, int i) {
3049 struct value *v = lookup_var(name, symtab);
3052 if (v->tag != T_NODESET)
3054 if (i >= v->nodeset->used)
3056 return v->nodeset->nodes[i];
3059 void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3060 const struct tree *tree) {
3061 list_for_each(tab, symtab) {
3062 if (tab->value->tag != T_NODESET)
3064 struct nodeset *ns = tab->value->nodeset;
3065 for (int i=0; i < ns->used;) {
3066 struct tree *t = ns->nodes[i];
3067 while (t != t->parent && t != tree)
3070 ns_remove(ns, i, 1);
3079 * indent-tabs-mode: nil