/*
* pathx.c: handling path expressions
*
- * Copyright (C) 2007-2011 David Lutterkort
+ * Copyright (C) 2007-2016 David Lutterkort
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
"following-sibling"
};
-static const char *const axis_sep = "::";
-
/* The characters that can follow a name in a location expression (aka path)
* The parser will assume that name (path component) is finished when it
* encounters any of these characters, unless they are escaped by preceding
*
* See parse_name for the gory details
*/
-static const char const name_follow[] = "][|/=()!,";
+static const char name_follow[] = "][|/=()!,";
/* Doubly linked list of location steps. Besides the information from the
* path expression, also contains information to iterate over a node set,
enum type tag;
union {
struct nodeset *nodeset; /* T_NODESET */
- int number; /* T_NUMBER */
+ int64_t number; /* T_NUMBER */
char *string; /* T_STRING */
bool boolval; /* T_BOOLEAN */
struct regexp *regexp; /* T_REGEXP */
struct { /* E_APP */
const struct func *func;
struct expr **args;
+ /* If fold is true, replace this function invocation
+ * with its value after the first time we evaluate this
+ * expression */
+ bool fold;
};
};
};
const char *name;
unsigned int arity;
enum type type;
+ bool pure; /* Result only depends on args */
const enum type *arg_types;
func_impl_t impl;
};
static void func_regexp_flag(struct state *state, int nargs);
static void func_glob(struct state *state, int nargs);
static void func_int(struct state *state, int nargs);
+static void func_not(struct state *state, int nargs);
-static const enum type const arg_types_nodeset[] = { T_NODESET };
-static const enum type const arg_types_string[] = { T_STRING };
-static const enum type const arg_types_bool[] = { T_BOOLEAN };
-static const enum type const arg_types_string_string[] = { T_STRING, T_STRING };
-static const enum type const arg_types_nodeset_string[] = { T_NODESET, T_STRING };
+static const enum type arg_types_nodeset[] = { T_NODESET };
+static const enum type arg_types_string[] = { T_STRING };
+static const enum type arg_types_bool[] = { T_BOOLEAN };
+static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
+static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
static const struct func builtin_funcs[] = {
{ .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
- .impl = func_last },
+ .impl = func_last, .pure = false },
{ .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
- .impl = func_position },
+ .impl = func_position, .pure = false },
{ .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
- .impl = func_label },
+ .impl = func_label, .pure = false },
{ .name = "count", .arity = 1, .type = T_NUMBER,
.arg_types = arg_types_nodeset,
- .impl = func_count },
+ .impl = func_count, .pure = false },
{ .name = "regexp", .arity = 1, .type = T_REGEXP,
.arg_types = arg_types_string,
- .impl = func_regexp },
+ .impl = func_regexp, .pure = true },
{ .name = "regexp", .arity = 1, .type = T_REGEXP,
.arg_types = arg_types_nodeset,
- .impl = func_regexp },
+ .impl = func_regexp, .pure = true },
{ .name = "regexp", .arity = 2, .type = T_REGEXP,
.arg_types = arg_types_string_string,
- .impl = func_regexp_flag },
+ .impl = func_regexp_flag, .pure = true },
{ .name = "regexp", .arity = 2, .type = T_REGEXP,
.arg_types = arg_types_nodeset_string,
- .impl = func_regexp_flag },
+ .impl = func_regexp_flag, .pure = true },
{ .name = "glob", .arity = 1, .type = T_REGEXP,
.arg_types = arg_types_string,
- .impl = func_glob },
+ .impl = func_glob, .pure = true },
{ .name = "glob", .arity = 1, .type = T_REGEXP,
.arg_types = arg_types_nodeset,
- .impl = func_glob },
+ .impl = func_glob, .pure = true },
{ .name = "int", .arity = 1, .type = T_NUMBER,
- .arg_types = arg_types_string, .impl = func_int },
+ .arg_types = arg_types_string, .impl = func_int, .pure = false },
{ .name = "int", .arity = 1, .type = T_NUMBER,
- .arg_types = arg_types_nodeset, .impl = func_int },
+ .arg_types = arg_types_nodeset, .impl = func_int, .pure = false },
{ .name = "int", .arity = 1, .type = T_NUMBER,
- .arg_types = arg_types_bool, .impl = func_int }
+ .arg_types = arg_types_bool, .impl = func_int, .pure = false },
+ { .name = "not", .arity = 1, .type = T_BOOLEAN,
+ .arg_types = arg_types_bool, .impl = func_not, .pure = true }
};
#define RET_ON_ERROR \
#define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
-#define ENOMEM_ON_NULL(state, v) \
- do { \
- if (v == NULL) { \
- STATE_ERROR(state, PATHX_ENOMEM); \
- return NULL; \
- } \
- } while (0);
-
/*
* Free the various data structures
*/
return result;
}
+/* Add NODE to NS if it is not in NS yet. This relies on the flag
+ * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
+ * as soon as we are done adding nodes to it.
+ */
static void ns_add(struct nodeset *ns, struct tree *node,
struct state *state) {
- for (int i=0; i < ns->used; i++)
- if (ns->nodes[i] == node)
- return;
+ if (node->added)
+ return;
if (ns->used >= ns->size) {
size_t size = 2 * ns->size;
if (size < 10) size = 10;
ns->size = size;
}
ns->nodes[ns->used] = node;
+ node->added = 1;
ns->used += 1;
}
+static void ns_clear_added(struct nodeset *ns) {
+ for (int i=0; i < ns->used; i++)
+ ns->nodes[i]->added = 0;
+}
+
static struct nodeset *
clone_nodeset(struct nodeset *ns, struct state *state)
{
return state->value_pool_used++;
}
-ATTRIBUTE_UNUSED
static value_ind_t clone_value(struct value *v, struct state *state) {
value_ind_t vind = make_value(v->tag, state);
RET0_ON_ERROR;
case T_STRING:
clone->string = strdup(v->string);
if (clone->string == NULL) {
- FREE(clone);
STATE_ENOMEM;
}
break;
push_value(vind, state);
}
+static void func_not(struct state *state, int nargs) {
+ ensure_arity(1, 1);
+ RET_ON_ERROR;
+
+ struct value *v = pop_value(state);
+ if (v->tag == T_BOOLEAN) {
+ push_boolean_value(! v->boolval, state);
+ }
+}
+
static struct regexp *
nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
struct regexp *result = NULL;
RET_ON_ERROR;
for (int i=0; i < r->nodeset->used; i++) {
ns_add(res, r->nodeset->nodes[i], state);
- RET_ON_ERROR;
+ if (HAS_ERROR(state))
+ goto error;
}
state->value_pool[vind].nodeset = res;
push_value(vind, state);
+ error:
+ ns_clear_added(res);
}
static void eval_concat_string(struct state *state) {
return (state->ctx_pos == v->number);
case T_NODESET:
return v->nodeset->used > 0;
+ case T_STRING:
+ return streqv(state->ctx->value, v->string);
default:
assert(0);
return false;
}
}
-static void ns_remove(struct nodeset *ns, int ind) {
- memmove(ns->nodes + ind, ns->nodes + ind+1,
- sizeof(ns->nodes[0]) * (ns->used - (ind+1)));
- ns->used -= 1;
+/* Remove COUNT successive entries from NS. The first entry to remove is at
+ IND */
+static void ns_remove(struct nodeset *ns, int ind, int count) {
+ if (count < 1)
+ return;
+ memmove(ns->nodes + ind, ns->nodes + ind+count,
+ sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
+ ns->used -= count;
}
/*
uint old_ctx_pos = state->ctx_pos;
for (int p=0; p < predicates->nexpr; p++) {
+ int first_bad = -1; /* The index of the first non-matching node */
state->ctx_len = ns->used;
state->ctx_pos = 1;
for (int i=0; i < ns->used; state->ctx_pos++) {
state->ctx = ns->nodes[i];
bool match = eval_pred(predicates->exprs[p], state);
RET_ON_ERROR;
+ /* We remove non-matching nodes from NS in batches; this logic
+ * makes sure that we only call ns_remove at the end of a run
+ * of non-matching nodes
+ */
if (match) {
- i+=1;
+ if (first_bad >= 0) {
+ ns_remove(ns, first_bad, i - first_bad);
+ i = first_bad + 1;
+ } else {
+ i += 1;
+ }
+ first_bad = -1;
} else {
- ns_remove(ns, i);
+ if (first_bad == -1)
+ first_bad = i;
+ i += 1;
}
}
+ if (first_bad >= 0) {
+ ns_remove(ns, first_bad, ns->used - first_bad);
+ }
}
state->ctx = old_ctx;
state->ctx_len = old_ctx_len;
}
+/* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
+static bool position_pred(struct pred *pred) {
+ return pred != NULL &&
+ pred->nexpr == 1 &&
+ pred->exprs[0]->tag == E_VALUE &&
+ pred->exprs[0]->type == T_NUMBER;
+}
+
+/* Return the tree node at the position implied by STEP->PREDICATES. It is
+ assumed and required that STEP->PREDICATES is actually a
+ POSITION_PRED.
+
+ This method hand-optimizes the important case of a path expression like
+ 'service[42]'
+*/
+static struct tree *position_filter(struct nodeset *ns,
+ struct step *step,
+ struct state *state) {
+ int value_ind = step->predicates->exprs[0]->value_ind;
+ int number = state->value_pool[value_ind].number;
+
+ int pos = 1;
+ for (int i=0; i < ns->used; i++) {
+ for (struct tree *node = step_first(step, ns->nodes[i]);
+ node != NULL;
+ node = step_next(step, ns->nodes[i], node), pos++) {
+ if (pos == number)
+ return node;
+ }
+ }
+
+ return NULL;
+}
+
/* Return an array of nodesets, one for each step in the locpath.
*
* On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
const struct nodeset *root,
struct state *state) {
struct tree *old_ctx = state->ctx;
-
*maxns = 0;
+
+ ensure(lp != NULL, state);
+
*ns = NULL;
list_for_each(step, lp->steps)
*maxns += 1;
if (root == NULL) {
struct step *first_step = NULL;
- if (lp != NULL)
- first_step = lp->steps;
+ first_step = lp->steps;
struct tree *root_tree;
root_tree = step_root(first_step, state->ctx, state->root_ctx);
ns_add((*ns)[0], root_tree, state);
+ ns_clear_added((*ns)[0]);
} else {
for (int i=0; i < root->used; i++)
ns_add((*ns)[0], root->nodes[i], state);
+ ns_clear_added((*ns)[0]);
}
if (HAS_ERROR(state))
list_for_each(step, lp->steps) {
struct nodeset *work = (*ns)[cur_ns];
struct nodeset *next = (*ns)[cur_ns + 1];
- for (int i=0; i < work->used; i++) {
- for (struct tree *node = step_first(step, work->nodes[i]);
- node != NULL;
- node = step_next(step, work->nodes[i], node))
+ if (position_pred(step->predicates)) {
+ struct tree *node = position_filter(work, step, state);
+ if (node) {
ns_add(next, node, state);
+ ns_clear_added(next);
+ }
+ } else {
+ for (int i=0; i < work->used; i++) {
+ for (struct tree *node = step_first(step, work->nodes[i]);
+ node != NULL;
+ node = step_next(step, work->nodes[i], node)) {
+ ns_add(next, node, state);
+ }
+ }
+ ns_clear_added(next);
+ ns_filter(next, step->predicates, state);
+ if (HAS_ERROR(state))
+ goto error;
}
- ns_filter(next, step->predicates, state);
- if (HAS_ERROR(state))
- goto error;
cur_ns += 1;
}
}
}
-static struct value *lookup_var(const char *ident, struct state *state) {
- list_for_each(tab, state->symtab) {
+static struct value *lookup_var(const char *ident,
+ const struct pathx_symtab *symtab) {
+ list_for_each(tab, symtab) {
if (STREQ(ident, tab->name))
return tab->value;
}
}
static void eval_var(struct expr *expr, struct state *state) {
- struct value *v = lookup_var(expr->ident, state);
+ struct value *v = lookup_var(expr->ident, state->symtab);
value_ind_t vind = clone_value(v, state);
RET_ON_ERROR;
push_value(vind, state);
break;
case E_APP:
eval_app(expr, state);
+ if (expr->fold) {
+ /* Do constant folding: replace the function application with
+ * a reference to the value that resulted from evaluating it */
+ for (int i=0; i < expr->func->arity; i++)
+ free_expr(expr->args[i]);
+ free(expr->args);
+ value_ind_t vind = state->values_used - 1;
+ expr->tag = E_VALUE;
+ expr->value_ind = state->values[vind];
+ }
break;
default:
assert(0);
check_expr(e, state);
RET_ON_ERROR;
if (e->type != T_NODESET && e->type != T_NUMBER &&
- e->type != T_BOOLEAN) {
+ e->type != T_BOOLEAN && e->type != T_STRING) {
STATE_ERROR(state, PATHX_ETYPE);
return;
}
if (f < ARRAY_CARDINALITY(builtin_funcs)) {
expr->func = builtin_funcs + f;
expr->type = expr->func->type;
+ expr->fold = expr->func->pure;
+ if (expr->fold) {
+ /* We only do constant folding for invocations of pure functions
+ * whose arguments are literal values. That misses opportunities
+ * for constant folding, e.g., "regexp('foo' + 'bar')" but is
+ * a bit simpler than doing full tracking of constants
+ */
+ for (int i=0; i < expr->func->arity; i++) {
+ if (expr->args[i]->tag != E_VALUE)
+ expr->fold = false;
+ }
+ }
} else {
STATE_ERROR(state, PATHX_ETYPE);
}
}
static void check_var(struct expr *expr, struct state *state) {
- struct value *v = lookup_var(expr->ident, state);
+ struct value *v = lookup_var(expr->ident, state->symtab);
if (v == NULL) {
STATE_ERROR(state, PATHX_ENOVAR);
return;
return 0;
}
+/* Return true if POS is preceded by an odd number of backslashes, i.e., if
+ * POS is escaped. Stop the search when we get to START */
+static bool backslash_escaped(const char *pos, const char *start) {
+ bool result=false;
+ while (pos-- > start && *pos == '\\') {
+ result = !result;
+ }
+ return result;
+}
+
/*
* NameNoWS ::= [^][|/\= \t\n] | \\.
* NameWS ::= [^][|/\=] | \\.
const char *s = state->pos;
char *result;
+ /* Advance state->pos until it points to the first character that is
+ * not part of a name. */
while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
- /* This is a hack: since we allow spaces in names, we need to avoid
- * gobbling up stuff that is in follow(Name), e.g. 'or' so that
- * things like [name1 or name2] still work.
- */
+ /* Since we allow spaces in names, we need to avoid gobbling up
+ * stuff that is in follow(Name), e.g. 'or' so that things like
+ * [name1 or name2] still work. In other words, we'll parse 'x frob
+ * y' as one name, but for 'x or y', we consider 'x' a name in its
+ * own right. */
if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
STREQLEN(state->pos, " and ", strlen(" and ")))
break;
state->pos += 1;
}
- /* Strip trailing white space */
+ /* Strip trailing white space. Make sure we respect escaped whitespace
+ * and don't strip it as in "x\\ " */
if (state->pos > s) {
state->pos -= 1;
- while (isspace(*state->pos) && state->pos >= s)
+ while (isspace(*state->pos) && state->pos > s
+ && !backslash_escaped(state->pos, s))
state->pos -= 1;
state->pos += 1;
}
if (*state->pos == '/') {
state->pos += 1;
step = make_step(DESCENDANT_OR_SELF, state);
- ENOMEM_ON_NULL(state, step);
+ if (step == NULL) {
+ STATE_ENOMEM;
+ goto error;
+ }
list_append(locpath->steps, step);
}
step = parse_step(state);
state->pos += 1;
locpath = parse_relative_location_path(state);
if (HAS_ERROR(state))
- return;
+ goto error;
struct step *step = make_step(DESCENDANT_OR_SELF, state);
if (HAS_ERROR(state))
goto error;
goto err_nomem;
}
struct step *step = make_step(ROOT, state);
- if (HAS_ERROR(state))
+ if (HAS_ERROR(state)) {
+ free_step(step);
goto error;
+ }
list_cons(locpath->steps, step);
}
} else {
*************************************************************************/
static bool step_matches(struct step *step, struct tree *tree) {
- return (step->name == NULL || streqx(step->name, tree->label));
+ if (step->name == NULL) {
+ return step->axis == ROOT || tree->label != NULL;
+ } else {
+ return streqx(step->name, tree->label);
+ }
}
static struct tree *tree_prev(struct tree *pos) {
return -1;
}
+int
+pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
+ struct value *v = lookup_var(name, symtab);
+
+ if (v == NULL || v->tag != T_NODESET)
+ return -1;
+
+ return v->nodeset->used;
+}
+
+struct tree *
+pathx_symtab_get_tree(struct pathx_symtab *symtab,
+ const char *name, int i) {
+ struct value *v = lookup_var(name, symtab);
+ if (v == NULL)
+ return NULL;
+ if (v->tag != T_NODESET)
+ return NULL;
+ if (i >= v->nodeset->used)
+ return NULL;
+ return v->nodeset->nodes[i];
+}
+
void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
const struct tree *tree) {
list_for_each(tab, symtab) {
while (t != t->parent && t != tree)
t = t->parent;
if (t == tree)
- ns_remove(ns, i);
+ ns_remove(ns, i, 1);
else
i += 1;
}