Imported Upstream version 1.12.0

[platform/upstream/augeas.git] / src / pathx.c
diff --git a/src/pathx.c b/src/pathx.c

index 8d8dbbb..35be7a2 100644 (file)
--- a/src/pathx.c
+++ b/src/pathx.c
@@ -1,7 +1,7 @@
  /*
   * pathx.c: handling path expressions
   *
- * Copyright (C) 2007-2011 David Lutterkort
+ * Copyright (C) 2007-2016 David Lutterkort
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
@@ -121,8 +121,6 @@ static const char *const axis_names[] = {
      "following-sibling"
  };
  
-static const char *const axis_sep = "::";
-
  /* The characters that can follow a name in a location expression (aka path)
   * The parser will assume that name (path component) is finished when it
   * encounters any of these characters, unless they are escaped by preceding
@@ -130,7 +128,7 @@ static const char *const axis_sep = "::";
   *
   * See parse_name for the gory details
   */
-static const char const name_follow[] = "][|/=()!,";
+static const char name_follow[] = "][|/=()!,";
  
  /* Doubly linked list of location steps. Besides the information from the
   * path expression, also contains information to iterate over a node set,
@@ -184,7 +182,7 @@ struct value {
      enum type tag;
      union {
          struct nodeset  *nodeset;     /* T_NODESET */
-        int              number;      /* T_NUMBER  */
+        int64_t          number;      /* T_NUMBER  */
          char            *string;      /* T_STRING  */
          bool             boolval;     /* T_BOOLEAN */
          struct regexp   *regexp;      /* T_REGEXP  */
@@ -210,6 +208,10 @@ struct expr {
          struct {                       /* E_APP */
              const struct func *func;
              struct expr       **args;
+            /* If fold is true, replace this function invocation
+             * with its value after the first time we evaluate this
+             * expression */
+            bool              fold;
          };
      };
  };
@@ -284,6 +286,7 @@ struct func {
      const char      *name;
      unsigned int     arity;
      enum type        type;
+    bool             pure;      /* Result only depends on args */
      const enum type *arg_types;
      func_impl_t      impl;
  };
@@ -296,47 +299,50 @@ static void func_regexp(struct state *state, int nargs);
  static void func_regexp_flag(struct state *state, int nargs);
  static void func_glob(struct state *state, int nargs);
  static void func_int(struct state *state, int nargs);
+static void func_not(struct state *state, int nargs);
  
-static const enum type const arg_types_nodeset[] = { T_NODESET };
-static const enum type const arg_types_string[] = { T_STRING };
-static const enum type const arg_types_bool[] = { T_BOOLEAN };
-static const enum type const arg_types_string_string[] = { T_STRING, T_STRING };
-static const enum type const arg_types_nodeset_string[] = { T_NODESET, T_STRING };
+static const enum type arg_types_nodeset[] = { T_NODESET };
+static const enum type arg_types_string[] = { T_STRING };
+static const enum type arg_types_bool[] = { T_BOOLEAN };
+static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
+static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
  
  static const struct func builtin_funcs[] = {
      { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
-      .impl = func_last },
+      .impl = func_last, .pure = false },
      { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
-      .impl = func_position },
+      .impl = func_position, .pure = false },
      { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
-      .impl = func_label },
+      .impl = func_label, .pure = false },
      { .name = "count", .arity = 1, .type = T_NUMBER,
        .arg_types = arg_types_nodeset,
-      .impl = func_count },
+      .impl = func_count, .pure = false },
      { .name = "regexp", .arity = 1, .type = T_REGEXP,
        .arg_types = arg_types_string,
-      .impl = func_regexp },
+      .impl = func_regexp, .pure = true },
      { .name = "regexp", .arity = 1, .type = T_REGEXP,
        .arg_types = arg_types_nodeset,
-      .impl = func_regexp },
+      .impl = func_regexp, .pure = true },
      { .name = "regexp", .arity = 2, .type = T_REGEXP,
        .arg_types = arg_types_string_string,
-      .impl = func_regexp_flag },
+      .impl = func_regexp_flag, .pure = true },
      { .name = "regexp", .arity = 2, .type = T_REGEXP,
        .arg_types = arg_types_nodeset_string,
-      .impl = func_regexp_flag },
+      .impl = func_regexp_flag, .pure = true },
      { .name = "glob", .arity = 1, .type = T_REGEXP,
        .arg_types = arg_types_string,
-      .impl = func_glob },
+      .impl = func_glob, .pure = true },
      { .name = "glob", .arity = 1, .type = T_REGEXP,
        .arg_types = arg_types_nodeset,
-      .impl = func_glob },
+      .impl = func_glob, .pure = true },
      { .name = "int", .arity = 1, .type = T_NUMBER,
-      .arg_types = arg_types_string, .impl = func_int },
+      .arg_types = arg_types_string, .impl = func_int, .pure = false },
      { .name = "int", .arity = 1, .type = T_NUMBER,
-      .arg_types = arg_types_nodeset, .impl = func_int },
+      .arg_types = arg_types_nodeset, .impl = func_int, .pure = false },
      { .name = "int", .arity = 1, .type = T_NUMBER,
-      .arg_types = arg_types_bool, .impl = func_int }
+      .arg_types = arg_types_bool, .impl = func_int, .pure = false },
+    { .name = "not", .arity = 1, .type = T_BOOLEAN,
+      .arg_types = arg_types_bool, .impl = func_not, .pure = true }
  };
  
  #define RET_ON_ERROR                                                    \
@@ -356,14 +362,6 @@ static const struct func builtin_funcs[] = {
  
  #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
  
-#define ENOMEM_ON_NULL(state, v)                                        \
-    do {                                                                \
-        if (v == NULL) {                                                \
-            STATE_ERROR(state, PATHX_ENOMEM);                           \
-            return NULL;                                                \
-        }                                                               \
-    } while (0);
-
  /*
   * Free the various data structures
   */
@@ -495,11 +493,14 @@ static struct nodeset *make_nodeset(struct state *state) {
      return result;
  }
  
+/* Add NODE to NS if it is not in NS yet. This relies on the flag
+ * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
+ * as soon as we are done adding nodes to it.
+ */
  static void ns_add(struct nodeset *ns, struct tree *node,
                     struct state *state) {
-    for (int i=0; i < ns->used; i++)
-        if (ns->nodes[i] == node)
-            return;
+    if (node->added)
+        return;
      if (ns->used >= ns->size) {
          size_t size = 2 * ns->size;
          if (size < 10) size = 10;
@@ -508,9 +509,15 @@ static void ns_add(struct nodeset *ns, struct tree *node,
          ns->size = size;
      }
      ns->nodes[ns->used] = node;
+    node->added = 1;
      ns->used += 1;
  }
  
+static void ns_clear_added(struct nodeset *ns) {
+    for (int i=0; i < ns->used; i++)
+        ns->nodes[i]->added = 0;
+}
+
  static struct nodeset *
  clone_nodeset(struct nodeset *ns, struct state *state)
  {
@@ -554,7 +561,6 @@ static value_ind_t make_value(enum type tag, struct state *state) {
      return state->value_pool_used++;
  }
  
-ATTRIBUTE_UNUSED
  static value_ind_t clone_value(struct value *v, struct state *state) {
      value_ind_t vind = make_value(v->tag, state);
      RET0_ON_ERROR;
@@ -570,7 +576,6 @@ static value_ind_t clone_value(struct value *v, struct state *state) {
      case T_STRING:
          clone->string = strdup(v->string);
          if (clone->string == NULL) {
-            FREE(clone);
              STATE_ENOMEM;
          }
          break;
@@ -719,6 +724,16 @@ static void func_int(struct state *state, int nargs) {
      push_value(vind, state);
  }
  
+static void func_not(struct state *state, int nargs) {
+    ensure_arity(1, 1);
+    RET_ON_ERROR;
+
+    struct value *v = pop_value(state);
+    if (v->tag == T_BOOLEAN) {
+        push_boolean_value(! v->boolval, state);
+    }
+}
+
  static struct regexp *
  nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
      struct regexp *result = NULL;
@@ -1003,10 +1018,13 @@ static void eval_union(struct state *state) {
      RET_ON_ERROR;
      for (int i=0; i < r->nodeset->used; i++) {
          ns_add(res, r->nodeset->nodes[i], state);
-        RET_ON_ERROR;
+        if (HAS_ERROR(state))
+            goto error;
      }
      state->value_pool[vind].nodeset = res;
      push_value(vind, state);
+ error:
+    ns_clear_added(res);
  }
  
  static void eval_concat_string(struct state *state) {
@@ -1140,16 +1158,22 @@ static bool eval_pred(struct expr *expr, struct state *state) {
          return (state->ctx_pos == v->number);
      case T_NODESET:
          return v->nodeset->used > 0;
+    case T_STRING:
+        return streqv(state->ctx->value, v->string);
      default:
          assert(0);
          return false;
      }
  }
  
-static void ns_remove(struct nodeset *ns, int ind) {
-    memmove(ns->nodes + ind, ns->nodes + ind+1,
-            sizeof(ns->nodes[0]) * (ns->used - (ind+1)));
-    ns->used -= 1;
+/* Remove COUNT successive entries from NS. The first entry to remove is at
+   IND */
+static void ns_remove(struct nodeset *ns, int ind, int count) {
+    if (count < 1)
+        return;
+    memmove(ns->nodes + ind, ns->nodes + ind+count,
+            sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
+    ns->used -= count;
  }
  
  /*
@@ -1165,18 +1189,34 @@ static void ns_filter(struct nodeset *ns, struct pred *predicates,
      uint old_ctx_pos = state->ctx_pos;
  
      for (int p=0; p < predicates->nexpr; p++) {
+        int first_bad = -1;  /* The index of the first non-matching node */
          state->ctx_len = ns->used;
          state->ctx_pos = 1;
          for (int i=0; i < ns->used; state->ctx_pos++) {
              state->ctx = ns->nodes[i];
              bool match = eval_pred(predicates->exprs[p], state);
              RET_ON_ERROR;
+            /* We remove non-matching nodes from NS in batches; this logic
+             * makes sure that we only call ns_remove at the end of a run
+             * of non-matching nodes
+             */
              if (match) {
-                i+=1;
+                if (first_bad >= 0) {
+                    ns_remove(ns, first_bad, i - first_bad);
+                    i = first_bad + 1;
+                } else {
+                    i += 1;
+                }
+                first_bad = -1;
              } else {
-                ns_remove(ns, i);
+                if (first_bad == -1)
+                    first_bad = i;
+                i += 1;
              }
          }
+        if (first_bad >= 0) {
+            ns_remove(ns, first_bad, ns->used - first_bad);
+        }
      }
  
      state->ctx = old_ctx;
@@ -1184,6 +1224,40 @@ static void ns_filter(struct nodeset *ns, struct pred *predicates,
      state->ctx_len = old_ctx_len;
  }
  
+/* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
+static bool position_pred(struct pred *pred) {
+    return pred != NULL &&
+        pred->nexpr == 1 &&
+        pred->exprs[0]->tag == E_VALUE &&
+        pred->exprs[0]->type == T_NUMBER;
+}
+
+/* Return the tree node at the position implied by STEP->PREDICATES. It is
+   assumed and required that STEP->PREDICATES is actually a
+   POSITION_PRED.
+
+   This method hand-optimizes the important case of a path expression like
+   'service[42]'
+*/
+static struct tree *position_filter(struct nodeset *ns,
+                                    struct step *step,
+                                    struct state *state) {
+    int value_ind = step->predicates->exprs[0]->value_ind;
+    int number = state->value_pool[value_ind].number;
+
+    int pos = 1;
+    for (int i=0; i < ns->used; i++) {
+        for (struct tree *node = step_first(step, ns->nodes[i]);
+             node != NULL;
+             node = step_next(step, ns->nodes[i], node), pos++) {
+            if (pos == number)
+                return node;
+        }
+    }
+
+    return NULL;
+}
+
  /* Return an array of nodesets, one for each step in the locpath.
   *
   * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
@@ -1194,8 +1268,10 @@ static void ns_from_locpath(struct locpath *lp, uint *maxns,
                              const struct nodeset *root,
                              struct state *state) {
      struct tree *old_ctx = state->ctx;
-
      *maxns = 0;
+
+    ensure(lp != NULL, state);
+
      *ns = NULL;
      list_for_each(step, lp->steps)
          *maxns += 1;
@@ -1211,15 +1287,16 @@ static void ns_from_locpath(struct locpath *lp, uint *maxns,
  
      if (root == NULL) {
          struct step *first_step = NULL;
-        if (lp != NULL)
-            first_step = lp->steps;
+        first_step = lp->steps;
  
          struct tree *root_tree;
          root_tree = step_root(first_step, state->ctx, state->root_ctx);
          ns_add((*ns)[0], root_tree, state);
+        ns_clear_added((*ns)[0]);
      } else {
          for (int i=0; i < root->used; i++)
              ns_add((*ns)[0], root->nodes[i], state);
+        ns_clear_added((*ns)[0]);
      }
  
      if (HAS_ERROR(state))
@@ -1229,15 +1306,25 @@ static void ns_from_locpath(struct locpath *lp, uint *maxns,
      list_for_each(step, lp->steps) {
          struct nodeset *work = (*ns)[cur_ns];
          struct nodeset *next = (*ns)[cur_ns + 1];
-        for (int i=0; i < work->used; i++) {
-            for (struct tree *node = step_first(step, work->nodes[i]);
-                 node != NULL;
-                 node = step_next(step, work->nodes[i], node))
+        if (position_pred(step->predicates)) {
+            struct tree *node = position_filter(work, step, state);
+            if (node) {
                  ns_add(next, node, state);
+                ns_clear_added(next);
+            }
+        } else {
+            for (int i=0; i < work->used; i++) {
+                for (struct tree *node = step_first(step, work->nodes[i]);
+                     node != NULL;
+                     node = step_next(step, work->nodes[i], node)) {
+                    ns_add(next, node, state);
+                }
+            }
+            ns_clear_added(next);
+            ns_filter(next, step->predicates, state);
+            if (HAS_ERROR(state))
+                goto error;
          }
-        ns_filter(next, step->predicates, state);
-        if (HAS_ERROR(state))
-            goto error;
          cur_ns += 1;
      }
  
@@ -1294,8 +1381,9 @@ static void eval_filter(struct expr *expr, struct state *state) {
      }
  }
  
-static struct value *lookup_var(const char *ident, struct state *state) {
-    list_for_each(tab, state->symtab) {
+static struct value *lookup_var(const char *ident,
+                                const struct pathx_symtab *symtab) {
+    list_for_each(tab, symtab) {
          if (STREQ(ident, tab->name))
              return tab->value;
      }
@@ -1303,7 +1391,7 @@ static struct value *lookup_var(const char *ident, struct state *state) {
  }
  
  static void eval_var(struct expr *expr, struct state *state) {
-    struct value *v = lookup_var(expr->ident, state);
+    struct value *v = lookup_var(expr->ident, state->symtab);
      value_ind_t vind = clone_value(v, state);
      RET_ON_ERROR;
      push_value(vind, state);
@@ -1326,6 +1414,16 @@ static void eval_expr(struct expr *expr, struct state *state) {
          break;
      case E_APP:
          eval_app(expr, state);
+        if (expr->fold) {
+            /* Do constant folding: replace the function application with
+             * a reference to the value that resulted from evaluating it */
+            for (int i=0; i < expr->func->arity; i++)
+                free_expr(expr->args[i]);
+            free(expr->args);
+            value_ind_t vind = state->values_used - 1;
+            expr->tag = E_VALUE;
+            expr->value_ind = state->values[vind];
+        }
          break;
      default:
          assert(0);
@@ -1353,7 +1451,7 @@ static void check_preds(struct pred *pred, struct state *state) {
          check_expr(e, state);
          RET_ON_ERROR;
          if (e->type != T_NODESET && e->type != T_NUMBER &&
-            e->type != T_BOOLEAN) {
+            e->type != T_BOOLEAN && e->type != T_STRING) {
              STATE_ERROR(state, PATHX_ETYPE);
              return;
          }
@@ -1410,6 +1508,18 @@ static void check_app(struct expr *expr, struct state *state) {
      if (f < ARRAY_CARDINALITY(builtin_funcs)) {
          expr->func = builtin_funcs + f;
          expr->type = expr->func->type;
+        expr->fold = expr->func->pure;
+        if (expr->fold) {
+            /* We only do constant folding for invocations of pure functions
+             * whose arguments are literal values. That misses opportunities
+             * for constant folding, e.g., "regexp('foo' + 'bar')" but is
+             * a bit simpler than doing full tracking of constants
+             */
+            for (int i=0; i < expr->func->arity; i++) {
+                if (expr->args[i]->tag != E_VALUE)
+                    expr->fold = false;
+            }
+        }
      } else {
          STATE_ERROR(state, PATHX_ETYPE);
      }
@@ -1498,7 +1608,7 @@ static void check_binary(struct expr *expr, struct state *state) {
  }
  
  static void check_var(struct expr *expr, struct state *state) {
-    struct value *v = lookup_var(expr->ident, state);
+    struct value *v = lookup_var(expr->ident, state->symtab);
      if (v == NULL) {
          STATE_ERROR(state, PATHX_ENOVAR);
          return;
@@ -1643,6 +1753,16 @@ int pathx_escape_name(const char *in, char **out) {
      return 0;
  }
  
+/* Return true if POS is preceded by an odd number of backslashes, i.e., if
+ * POS is escaped. Stop the search when we get to START */
+static bool backslash_escaped(const char *pos, const char *start) {
+    bool result=false;
+    while (pos-- > start && *pos == '\\') {
+        result = !result;
+    }
+    return result;
+}
+
  /*
   * NameNoWS ::= [^][|/\= \t\n] | \\.
   * NameWS   ::= [^][|/\=] | \\.
@@ -1652,11 +1772,14 @@ static char *parse_name(struct state *state) {
      const char *s = state->pos;
      char *result;
  
+    /* Advance state->pos until it points to the first character that is
+     * not part of a name. */
      while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
-        /* This is a hack: since we allow spaces in names, we need to avoid
-         * gobbling up stuff that is in follow(Name), e.g. 'or' so that
-         * things like [name1 or name2] still work.
-         */
+        /* Since we allow spaces in names, we need to avoid gobbling up
+         * stuff that is in follow(Name), e.g. 'or' so that things like
+         * [name1 or name2] still work. In other words, we'll parse 'x frob
+         * y' as one name, but for 'x or y', we consider 'x' a name in its
+         * own right. */
          if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
              STREQLEN(state->pos, " and ", strlen(" and ")))
              break;
@@ -1671,10 +1794,12 @@ static char *parse_name(struct state *state) {
          state->pos += 1;
      }
  
-    /* Strip trailing white space */
+    /* Strip trailing white space. Make sure we respect escaped whitespace
+     * and don't strip it as in "x\\ " */
      if (state->pos > s) {
          state->pos -= 1;
-        while (isspace(*state->pos) && state->pos >= s)
+        while (isspace(*state->pos) && state->pos > s
+               && !backslash_escaped(state->pos, s))
              state->pos -= 1;
          state->pos += 1;
      }
@@ -1837,7 +1962,10 @@ parse_relative_location_path(struct state *state) {
          if (*state->pos == '/') {
              state->pos += 1;
              step = make_step(DESCENDANT_OR_SELF, state);
-            ENOMEM_ON_NULL(state, step);
+            if (step == NULL) {
+                STATE_ENOMEM;
+                goto error;
+            }
              list_append(locpath->steps, step);
          }
          step = parse_step(state);
@@ -1870,7 +1998,7 @@ static void parse_location_path(struct state *state) {
              state->pos += 1;
              locpath = parse_relative_location_path(state);
              if (HAS_ERROR(state))
-                return;
+                goto error;
              struct step *step = make_step(DESCENDANT_OR_SELF, state);
              if (HAS_ERROR(state))
                  goto error;
@@ -1883,8 +2011,10 @@ static void parse_location_path(struct state *state) {
                      goto err_nomem;
              }
              struct step *step = make_step(ROOT, state);
-            if (HAS_ERROR(state))
+            if (HAS_ERROR(state)) {
+                free_step(step);
                  goto error;
+            }
              list_cons(locpath->steps, step);
          }
      } else {
@@ -2468,7 +2598,11 @@ int pathx_parse(const struct tree *tree,
   *************************************************************************/
  
  static bool step_matches(struct step *step, struct tree *tree) {
-    return (step->name == NULL || streqx(step->name, tree->label));
+    if (step->name == NULL) {
+        return step->axis == ROOT || tree->label != NULL;
+    } else {
+        return streqx(step->name, tree->label);
+    }
  }
  
  static struct tree *tree_prev(struct tree *pos) {
@@ -2926,6 +3060,29 @@ int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
      return -1;
  }
  
+int
+pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
+    struct value *v = lookup_var(name, symtab);
+
+    if (v == NULL || v->tag != T_NODESET)
+        return -1;
+
+    return v->nodeset->used;
+}
+
+struct tree *
+pathx_symtab_get_tree(struct pathx_symtab *symtab,
+                      const char *name, int i) {
+    struct value *v = lookup_var(name, symtab);
+    if (v == NULL)
+        return NULL;
+    if (v->tag != T_NODESET)
+        return NULL;
+    if (i >= v->nodeset->used)
+        return NULL;
+    return v->nodeset->nodes[i];
+}
+
  void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
                                       const struct tree *tree) {
      list_for_each(tab, symtab) {
@@ -2937,7 +3094,7 @@ void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
              while (t != t->parent && t != tree)
                  t = t->parent;
              if (t == tree)
-                ns_remove(ns, i);
+                ns_remove(ns, i, 1);
              else
                  i += 1;
          }