src/pathx.c

   1 /*
   2  * pathx.c: handling path expressions
   3  *
   4  * Copyright (C) 2007-2016 David Lutterkort
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  19  *
  20  * Author: David Lutterkort <dlutter@redhat.com>
  21  */
  22
  23 #include <config.h>
  24 #include <internal.h>
  25 #include <stdint.h>
  26 #include <stdbool.h>
  27 #include <memory.h>
  28 #include <ctype.h>
  29
  30 #include "ref.h"
  31 #include "regexp.h"
  32 #include "errcode.h"
  33
  34 static const char *const errcodes[] = {
  35     "no error",
  36     "empty name",
  37     "illegal string literal",
  38     "illegal number",
  39     "string missing ending ' or \"",
  40     "expected '='",
  41     "allocation failed",
  42     "unmatched '['",
  43     "unmatched '('",
  44     "expected a '/'",
  45     "internal error",                             /* PATHX_EINTERNAL */
  46     "type error",                                 /* PATHX_ETYPE */
  47     "undefined variable",                         /* PATHX_ENOVAR */
  48     "garbage at end of path expression",          /* PATHX_EEND */
  49     "no match for path expression",               /* PATHX_ENOMATCH */
  50     "wrong number of arguments in function call", /* PATHX_EARITY */
  51     "invalid regular expression",                 /* PATHX_EREGEXP */
  52     "too many matches",                           /* PATHX_EMMATCH */
  53     "wrong flag for regexp"                       /* PATHX_EREGEXPFLAG */
  54 };
  55
  56 /*
  57  * Path expressions are strings that use a notation modelled on XPath.
  58  */
  59
  60 enum type {
  61     T_NONE = 0,     /* Not a type */
  62     T_NODESET,
  63     T_BOOLEAN,
  64     T_NUMBER,
  65     T_STRING,
  66     T_REGEXP
  67 };
  68
  69 enum expr_tag {
  70     E_FILTER,
  71     E_BINARY,
  72     E_VALUE,
  73     E_VAR,
  74     E_APP
  75 };
  76
  77 enum binary_op {
  78     OP_EQ,         /* '='  */
  79     OP_NEQ,        /* '!=' */
  80     OP_LT,         /* '<'  */
  81     OP_LE,         /* '<=' */
  82     OP_GT,         /* '>'  */
  83     OP_GE,         /* '>=' */
  84     OP_PLUS,       /* '+'  */
  85     OP_MINUS,      /* '-'  */
  86     OP_STAR,       /* '*'  */
  87     OP_AND,        /* 'and' */
  88     OP_OR,         /* 'or' */
  89     OP_RE_MATCH,   /* '=~' */
  90     OP_RE_NOMATCH, /* '!~' */
  91     OP_UNION       /* '|' */
  92 };
  93
  94 struct pred {
  95     int               nexpr;
  96     struct expr     **exprs;
  97 };
  98
  99 enum axis {
 100     SELF,
 101     CHILD,
 102     DESCENDANT,
 103     DESCENDANT_OR_SELF,
 104     PARENT,
 105     ANCESTOR,
 106     ROOT,
 107     PRECEDING_SIBLING,
 108     FOLLOWING_SIBLING
 109 };
 110
 111 /* This array is indexed by enum axis */
 112 static const char *const axis_names[] = {
 113     "self",
 114     "child",
 115     "descendant",
 116     "descendant-or-self",
 117     "parent",
 118     "ancestor",
 119     "root",
 120     "preceding-sibling",
 121     "following-sibling"
 122 };
 123
 124 /* The characters that can follow a name in a location expression (aka path)
 125  * The parser will assume that name (path component) is finished when it
 126  * encounters any of these characters, unless they are escaped by preceding
 127  * them with a '\\'.
 128  *
 129  * See parse_name for the gory details
 130  */
 131 static const char name_follow[] = "][|/=()!,";
 132
 133 /* Doubly linked list of location steps. Besides the information from the
 134  * path expression, also contains information to iterate over a node set,
 135  * in particular, the context node CTX for the step, and the current node
 136  * CUR within that context.
 137  */
 138 struct step {
 139     struct step *next;
 140     enum axis    axis;
 141     char        *name;              /* NULL to match any name */
 142     struct pred *predicates;
 143 };
 144
 145 /* Initialise the root nodeset with the first step */
 146 static struct tree *step_root(struct step *step, struct tree *ctx,
 147                               struct tree *root_ctx);
 148 /* Iteration over the nodes on a step, ignoring the predicates */
 149 static struct tree *step_first(struct step *step, struct tree *ctx);
 150 static struct tree *step_next(struct step *step, struct tree *ctx,
 151                               struct tree *node);
 152
 153 struct pathx_symtab {
 154     struct pathx_symtab *next;
 155     char                *name;
 156     struct value        *value;
 157 };
 158
 159 struct pathx {
 160     struct state   *state;
 161     struct nodeset *nodeset;
 162     int             node;
 163     struct tree    *origin;
 164 };
 165
 166 #define L_BRACK '['
 167 #define R_BRACK ']'
 168
 169 struct locpath {
 170     struct step *steps;
 171 };
 172
 173 struct nodeset {
 174     struct tree **nodes;
 175     size_t        used;
 176     size_t        size;
 177 };
 178
 179 typedef uint32_t value_ind_t;
 180
 181 struct value {
 182     enum type tag;
 183     union {
 184         struct nodeset  *nodeset;     /* T_NODESET */
 185         int64_t          number;      /* T_NUMBER  */
 186         char            *string;      /* T_STRING  */
 187         bool             boolval;     /* T_BOOLEAN */
 188         struct regexp   *regexp;      /* T_REGEXP  */
 189     };
 190 };
 191
 192 struct expr {
 193     enum expr_tag tag;
 194     enum type     type;
 195     union {
 196         struct {                       /* E_FILTER */
 197             struct expr     *primary;
 198             struct pred     *predicates;
 199             struct locpath  *locpath;
 200         };
 201         struct {                       /* E_BINARY */
 202             enum binary_op op;
 203             struct expr *left;
 204             struct expr *right;
 205         };
 206         value_ind_t      value_ind;    /* E_VALUE */
 207         char            *ident;        /* E_VAR */
 208         struct {                       /* E_APP */
 209             const struct func *func;
 210             struct expr       **args;
 211             /* If fold is true, replace this function invocation
 212              * with its value after the first time we evaluate this
 213              * expression */
 214             bool              fold;
 215         };
 216     };
 217 };
 218
 219 struct locpath_trace {
 220     unsigned int      maxns;
 221     struct nodeset  **ns;
 222     struct locpath   *lp;
 223 };
 224
 225 /* Internal state of the evaluator/parser */
 226 struct state {
 227     pathx_errcode_t errcode;
 228     const char     *file;
 229     int             line;
 230     char           *errmsg;
 231
 232     const char     *txt;  /* Entire expression */
 233     const char     *pos;  /* Current position within TXT during parsing */
 234
 235     struct tree *ctx; /* The current node */
 236     uint            ctx_pos;
 237     uint            ctx_len;
 238
 239     struct tree *root_ctx; /* Root context for relative paths */
 240
 241     /* A table of all values. The table is dynamically reallocated, i.e.
 242      * pointers to struct value should not be used across calls that
 243      * might allocate new values
 244      *
 245      * value_pool[0] is always the boolean false, and value_pool[1]
 246      * always the boolean true
 247      */
 248     struct value  *value_pool;
 249     value_ind_t    value_pool_used;
 250     value_ind_t    value_pool_size;
 251     /* Stack of values (as indices into value_pool), with bottom of
 252        stack in values[0] */
 253     value_ind_t   *values;
 254     size_t         values_used;
 255     size_t         values_size;
 256     /* Stack of expressions, with bottom of stack in exprs[0] */
 257     struct expr  **exprs;
 258     size_t         exprs_used;
 259     size_t         exprs_size;
 260     /* Trace of a locpath evaluation, needed by pathx_expand_tree.
 261        Generally NULL, unless a trace is needed.
 262      */
 263     struct locpath_trace *locpath_trace;
 264     /* Symbol table for variable lookups */
 265     struct pathx_symtab *symtab;
 266     /* Error structure, used to communicate errors to struct augeas;
 267      * we never own this structure, and therefore never free it */
 268     struct error        *error;
 269 };
 270
 271 /* We consider NULL and the empty string to be equal */
 272 ATTRIBUTE_PURE
 273 static inline int streqx(const char *s1, const char *s2) {
 274     if (s1 == NULL)
 275         return (s2 == NULL || strlen(s2) == 0);
 276     if (s2 == NULL)
 277         return strlen(s1) == 0;
 278     return STREQ(s1, s2);
 279 }
 280
 281 /* Functions */
 282
 283 typedef void (*func_impl_t)(struct state *state, int nargs);
 284
 285 struct func {
 286     const char      *name;
 287     unsigned int     arity;
 288     enum type        type;
 289     bool             pure;      /* Result only depends on args */
 290     const enum type *arg_types;
 291     func_impl_t      impl;
 292 };
 293
 294 static void func_last(struct state *state, int nargs);
 295 static void func_position(struct state *state, int nargs);
 296 static void func_count(struct state *state, int nargs);
 297 static void func_label(struct state *state, int nargs);
 298 static void func_regexp(struct state *state, int nargs);
 299 static void func_regexp_flag(struct state *state, int nargs);
 300 static void func_glob(struct state *state, int nargs);
 301 static void func_int(struct state *state, int nargs);
 302 static void func_not(struct state *state, int nargs);
 303
 304 static const enum type arg_types_nodeset[] = { T_NODESET };
 305 static const enum type arg_types_string[] = { T_STRING };
 306 static const enum type arg_types_bool[] = { T_BOOLEAN };
 307 static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
 308 static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
 309
 310 static const struct func builtin_funcs[] = {
 311     { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
 312       .impl = func_last, .pure = false },
 313     { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
 314       .impl = func_position, .pure = false },
 315     { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
 316       .impl = func_label, .pure = false },
 317     { .name = "count", .arity = 1, .type = T_NUMBER,
 318       .arg_types = arg_types_nodeset,
 319       .impl = func_count, .pure = false },
 320     { .name = "regexp", .arity = 1, .type = T_REGEXP,
 321       .arg_types = arg_types_string,
 322       .impl = func_regexp, .pure = true },
 323     { .name = "regexp", .arity = 1, .type = T_REGEXP,
 324       .arg_types = arg_types_nodeset,
 325       .impl = func_regexp, .pure = true },
 326     { .name = "regexp", .arity = 2, .type = T_REGEXP,
 327       .arg_types = arg_types_string_string,
 328       .impl = func_regexp_flag, .pure = true },
 329     { .name = "regexp", .arity = 2, .type = T_REGEXP,
 330       .arg_types = arg_types_nodeset_string,
 331       .impl = func_regexp_flag, .pure = true },
 332     { .name = "glob", .arity = 1, .type = T_REGEXP,
 333       .arg_types = arg_types_string,
 334       .impl = func_glob, .pure = true },
 335     { .name = "glob", .arity = 1, .type = T_REGEXP,
 336       .arg_types = arg_types_nodeset,
 337       .impl = func_glob, .pure = true },
 338     { .name = "int", .arity = 1, .type = T_NUMBER,
 339       .arg_types = arg_types_string, .impl = func_int, .pure = false },
 340     { .name = "int", .arity = 1, .type = T_NUMBER,
 341       .arg_types = arg_types_nodeset, .impl = func_int, .pure = false },
 342     { .name = "int", .arity = 1, .type = T_NUMBER,
 343       .arg_types = arg_types_bool, .impl = func_int, .pure = false },
 344     { .name = "not", .arity = 1, .type = T_BOOLEAN,
 345       .arg_types = arg_types_bool, .impl = func_not, .pure = true }
 346 };
 347
 348 #define RET_ON_ERROR                                                    \
 349     if (state->errcode != PATHX_NOERROR) return
 350
 351 #define RET0_ON_ERROR                                                   \
 352     if (state->errcode != PATHX_NOERROR) return 0
 353
 354 #define STATE_ERROR(state, err)                                         \
 355     do {                                                                \
 356         state->errcode = err;                                           \
 357         state->file = __FILE__;                                         \
 358         state->line = __LINE__;                                         \
 359     } while (0)
 360
 361 #define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
 362
 363 #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
 364
 365 /*
 366  * Free the various data structures
 367  */
 368
 369 static void free_expr(struct expr *expr);
 370
 371 static void free_pred(struct pred *pred) {
 372     if (pred == NULL)
 373         return;
 374
 375     for (int i=0; i < pred->nexpr; i++) {
 376         free_expr(pred->exprs[i]);
 377     }
 378     free(pred->exprs);
 379     free(pred);
 380 }
 381
 382 static void free_step(struct step *step) {
 383     while (step != NULL) {
 384         struct step *del = step;
 385         step = del->next;
 386         free(del->name);
 387         free_pred(del->predicates);
 388         free(del);
 389     }
 390 }
 391
 392 static void free_locpath(struct locpath *locpath) {
 393     if (locpath == NULL)
 394         return;
 395     while (locpath->steps != NULL) {
 396         struct step *step = locpath->steps;
 397         locpath->steps = step->next;
 398         free(step->name);
 399         free_pred(step->predicates);
 400         free(step);
 401     }
 402     free(locpath);
 403 }
 404
 405 static void free_expr(struct expr *expr) {
 406     if (expr == NULL)
 407         return;
 408     switch (expr->tag) {
 409     case E_FILTER:
 410         free_expr(expr->primary);
 411         free_pred(expr->predicates);
 412         free_locpath(expr->locpath);
 413         break;
 414     case E_BINARY:
 415         free_expr(expr->left);
 416         free_expr(expr->right);
 417         break;
 418     case E_VALUE:
 419         break;
 420     case E_VAR:
 421         free(expr->ident);
 422         break;
 423     case E_APP:
 424         for (int i=0; i < expr->func->arity; i++)
 425             free_expr(expr->args[i]);
 426         free(expr->args);
 427         break;
 428     default:
 429         assert(0);
 430     }
 431     free(expr);
 432 }
 433
 434 static void free_nodeset(struct nodeset *ns) {
 435     if (ns != NULL) {
 436         free(ns->nodes);
 437         free(ns);
 438     }
 439 }
 440
 441 /* Free all objects used by VALUE, but not VALUE itself */
 442 static void release_value(struct value *v) {
 443     if (v == NULL)
 444         return;
 445
 446     switch (v->tag) {
 447     case T_NODESET:
 448         free_nodeset(v->nodeset);
 449         break;
 450     case T_STRING:
 451         free(v->string);
 452         break;
 453     case T_BOOLEAN:
 454     case T_NUMBER:
 455         break;
 456     case T_REGEXP:
 457         unref(v->regexp, regexp);
 458         break;
 459     default:
 460         assert(0);
 461     }
 462 }
 463
 464 static void free_state(struct state *state) {
 465     if (state == NULL)
 466         return;
 467
 468     for(int i=0; i < state->exprs_used; i++)
 469         free_expr(state->exprs[i]);
 470     free(state->exprs);
 471
 472     for(int i=0; i < state->value_pool_used; i++)
 473         release_value(state->value_pool + i);
 474     free(state->value_pool);
 475     free(state->values);
 476     free(state);
 477 }
 478
 479 void free_pathx(struct pathx *pathx) {
 480     if (pathx == NULL)
 481         return;
 482     free_state(pathx->state);
 483     free(pathx);
 484 }
 485
 486 /*
 487  * Nodeset helpers
 488  */
 489 static struct nodeset *make_nodeset(struct state *state) {
 490     struct nodeset *result;
 491     if (ALLOC(result) < 0)
 492         STATE_ENOMEM;
 493     return result;
 494 }
 495
 496 /* Add NODE to NS if it is not in NS yet. This relies on the flag
 497  * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
 498  * as soon as we are done adding nodes to it.
 499  */
 500 static void ns_add(struct nodeset *ns, struct tree *node,
 501                    struct state *state) {
 502     if (node->added)
 503         return;
 504     if (ns->used >= ns->size) {
 505         size_t size = 2 * ns->size;
 506         if (size < 10) size = 10;
 507         if (REALLOC_N(ns->nodes, size) < 0)
 508             STATE_ENOMEM;
 509         ns->size = size;
 510     }
 511     ns->nodes[ns->used] = node;
 512     node->added = 1;
 513     ns->used += 1;
 514 }
 515
 516 static void ns_clear_added(struct nodeset *ns) {
 517     for (int i=0; i < ns->used; i++)
 518         ns->nodes[i]->added = 0;
 519 }
 520
 521 static struct nodeset *
 522 clone_nodeset(struct nodeset *ns, struct state *state)
 523 {
 524     struct nodeset *clone;
 525     if (ALLOC(clone) < 0) {
 526         STATE_ENOMEM;
 527         return NULL;
 528     }
 529     if (ALLOC_N(clone->nodes, ns->used) < 0) {
 530         free(clone);
 531         STATE_ENOMEM;
 532         return NULL;
 533     }
 534     clone->used = ns->used;
 535     clone->size = ns->used;
 536     for (int i=0; i < ns->used; i++)
 537         clone->nodes[i] = ns->nodes[i];
 538     return clone;
 539 }
 540
 541 /*
 542  * Handling values
 543  */
 544 static value_ind_t make_value(enum type tag, struct state *state) {
 545     assert(tag != T_BOOLEAN);
 546
 547     if (state->value_pool_used >= state->value_pool_size) {
 548         value_ind_t new_size = 2*state->value_pool_size;
 549         if (new_size <= state->value_pool_size) {
 550             STATE_ENOMEM;
 551             return 0;
 552         }
 553         if (REALLOC_N(state->value_pool, new_size) < 0) {
 554             STATE_ENOMEM;
 555             return 0;
 556         }
 557         state->value_pool_size = new_size;
 558     }
 559     state->value_pool[state->value_pool_used].tag = tag;
 560     state->value_pool[state->value_pool_used].nodeset = NULL;
 561     return state->value_pool_used++;
 562 }
 563
 564 static value_ind_t clone_value(struct value *v, struct state *state) {
 565     value_ind_t vind = make_value(v->tag, state);
 566     RET0_ON_ERROR;
 567     struct value *clone = state->value_pool + vind;
 568
 569     switch (v->tag) {
 570     case T_NODESET:
 571         clone->nodeset = clone_nodeset(v->nodeset, state);
 572         break;
 573     case T_NUMBER:
 574         clone->number = v->number;
 575         break;
 576     case T_STRING:
 577         clone->string = strdup(v->string);
 578         if (clone->string == NULL) {
 579             STATE_ENOMEM;
 580         }
 581         break;
 582     case T_BOOLEAN:
 583         clone->boolval = v->boolval;
 584         break;
 585     case T_REGEXP:
 586         clone->regexp = ref(v->regexp);
 587         break;
 588     default:
 589         assert(0);
 590     }
 591     return vind;
 592 }
 593
 594 static value_ind_t pop_value_ind(struct state *state) {
 595     if (state->values_used > 0) {
 596         state->values_used -= 1;
 597         return state->values[state->values_used];
 598     } else {
 599         STATE_ERROR(state, PATHX_EINTERNAL);
 600         assert(0);
 601         return 0;
 602     }
 603 }
 604
 605 static struct value *pop_value(struct state *state) {
 606     value_ind_t vind = pop_value_ind(state);
 607     if (HAS_ERROR(state))
 608         return NULL;
 609     return state->value_pool + vind;
 610 }
 611
 612 static void push_value(value_ind_t vind, struct state *state) {
 613     if (state->values_used >= state->values_size) {
 614         size_t new_size = 2*state->values_size;
 615         if (new_size == 0) new_size = 8;
 616         if (REALLOC_N(state->values, new_size) < 0) {
 617             STATE_ENOMEM;
 618             return;
 619         }
 620         state->values_size = new_size;
 621     }
 622     state->values[state->values_used++] = vind;
 623 }
 624
 625 static void push_boolean_value(int b, struct state *state) {
 626     push_value(b != 0, state);
 627 }
 628
 629 ATTRIBUTE_PURE
 630 static struct value *expr_value(struct expr *expr, struct state *state) {
 631     return state->value_pool + expr->value_ind;
 632 }
 633
 634 /*************************************************************************
 635  * Evaluation
 636  ************************************************************************/
 637 static void eval_expr(struct expr *expr, struct state *state);
 638
 639
 640 #define ensure_arity(min, max)                                          \
 641     if (nargs < min || nargs > max) {                                   \
 642         STATE_ERROR(state, PATHX_EINTERNAL);                            \
 643         return;                                                         \
 644     }
 645
 646 static void func_last(struct state *state, int nargs) {
 647     ensure_arity(0, 0);
 648     value_ind_t vind = make_value(T_NUMBER, state);
 649     RET_ON_ERROR;
 650
 651     state->value_pool[vind].number = state->ctx_len;
 652     push_value(vind, state);
 653 }
 654
 655 static void func_position(struct state *state, int nargs) {
 656     ensure_arity(0, 0);
 657     value_ind_t vind = make_value(T_NUMBER, state);
 658     RET_ON_ERROR;
 659
 660     state->value_pool[vind].number = state->ctx_pos;
 661     push_value(vind, state);
 662 }
 663
 664 static void func_count(struct state *state, int nargs) {
 665     ensure_arity(1, 1);
 666     value_ind_t vind = make_value(T_NUMBER, state);
 667     RET_ON_ERROR;
 668
 669     struct value *ns = pop_value(state);
 670     state->value_pool[vind].number = ns->nodeset->used;
 671     push_value(vind, state);
 672 }
 673
 674 static void func_label(struct state *state, int nargs) {
 675     ensure_arity(0, 0);
 676     value_ind_t vind = make_value(T_STRING, state);
 677     char *s;
 678
 679     RET_ON_ERROR;
 680
 681     if (state->ctx->label)
 682         s = strdup(state->ctx->label);
 683     else
 684         s = strdup("");
 685     if (s == NULL) {
 686         STATE_ENOMEM;
 687         return;
 688     }
 689     state->value_pool[vind].string = s;
 690     push_value(vind, state);
 691 }
 692
 693 static void func_int(struct state *state, int nargs) {
 694     ensure_arity(1, 1);
 695     value_ind_t vind = make_value(T_NUMBER, state);
 696     int64_t i = -1;
 697     RET_ON_ERROR;
 698
 699     struct value *v = pop_value(state);
 700     if (v->tag == T_BOOLEAN) {
 701         i = v->boolval;
 702     } else {
 703         const char *s = NULL;
 704         if (v->tag == T_STRING) {
 705             s = v->string;
 706         } else {
 707             /* T_NODESET */
 708             if (v->nodeset->used != 1) {
 709                 STATE_ERROR(state, PATHX_EMMATCH);
 710                 return;
 711             }
 712             s = v->nodeset->nodes[0]->value;
 713         }
 714         if (s != NULL) {
 715             int r;
 716             r = xstrtoint64(s, 10, &i);
 717             if (r < 0) {
 718                 STATE_ERROR(state, PATHX_ENUMBER);
 719                 return;
 720             }
 721         }
 722     }
 723     state->value_pool[vind].number = i;
 724     push_value(vind, state);
 725 }
 726
 727 static void func_not(struct state *state, int nargs) {
 728     ensure_arity(1, 1);
 729     RET_ON_ERROR;
 730
 731     struct value *v = pop_value(state);
 732     if (v->tag == T_BOOLEAN) {
 733         push_boolean_value(! v->boolval, state);
 734     }
 735 }
 736
 737 static struct regexp *
 738 nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
 739     struct regexp *result = NULL;
 740     struct regexp **rx = NULL;
 741     int used = 0;
 742
 743     for (int i = 0; i < ns->used; i++) {
 744         if (ns->nodes[i]->value != NULL)
 745             used += 1;
 746     }
 747
 748     if (used == 0) {
 749         /* If the nodeset is empty, make sure we produce a regexp
 750          * that never matches anything */
 751         result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
 752     } else {
 753         if (ALLOC_N(rx, ns->used) < 0)
 754             goto error;
 755         for (int i=0; i < ns->used; i++) {
 756             if (ns->nodes[i]->value == NULL)
 757                 continue;
 758
 759             if (glob)
 760                 rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
 761             else
 762                 rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
 763             if (rx[i] == NULL)
 764                 goto error;
 765         }
 766         result = regexp_union_n(info, ns->used, rx);
 767     }
 768
 769  error:
 770     if (rx != NULL) {
 771         for (int i=0; i < ns->used; i++)
 772             unref(rx[i], regexp);
 773         free(rx);
 774     }
 775     return result;
 776 }
 777
 778 static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
 779     value_ind_t vind = make_value(T_REGEXP, state);
 780     int r;
 781
 782     RET_ON_ERROR;
 783
 784     struct value *v = pop_value(state);
 785     struct regexp *rx = NULL;
 786
 787     if (v->tag == T_STRING) {
 788         if (glob)
 789             rx = make_regexp_from_glob(state->error->info, v->string);
 790         else
 791             rx = make_regexp_unescape(state->error->info, v->string, nocase);
 792     } else if (v->tag == T_NODESET) {
 793         rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
 794     } else {
 795         assert(0);
 796     }
 797
 798     if (rx == NULL) {
 799         STATE_ENOMEM;
 800         return;
 801     }
 802
 803     state->value_pool[vind].regexp = rx;
 804     r = regexp_compile(rx);
 805     if (r < 0) {
 806         const char *msg;
 807         regexp_check(rx, &msg);
 808         state->errmsg = strdup(msg);
 809         STATE_ERROR(state, PATHX_EREGEXP);
 810         return;
 811     }
 812     push_value(vind, state);
 813 }
 814
 815 static void func_regexp(struct state *state, int nargs) {
 816     ensure_arity(1, 1);
 817     func_regexp_or_glob(state, 0, 0);
 818 }
 819
 820 static void func_regexp_flag(struct state *state, int nargs) {
 821     ensure_arity(2, 2);
 822     int nocase = 0;
 823     struct value *f = pop_value(state);
 824
 825     if (STREQ("i", f->string))
 826         nocase = 1;
 827     else
 828         STATE_ERROR(state, PATHX_EREGEXPFLAG);
 829
 830     func_regexp_or_glob(state, 0, nocase);
 831 }
 832
 833 static void func_glob(struct state *state, int nargs) {
 834     ensure_arity(1, 1);
 835     func_regexp_or_glob(state, 1, 0);
 836 }
 837
 838 static bool coerce_to_bool(struct value *v) {
 839     switch (v->tag) {
 840     case T_NODESET:
 841         return v->nodeset->used > 0;
 842         break;
 843     case T_BOOLEAN:
 844         return v->boolval;
 845         break;
 846     case T_NUMBER:
 847         return v->number > 0;
 848         break;
 849     case T_STRING:
 850         return strlen(v->string) > 0;
 851         break;
 852     case T_REGEXP:
 853         return true;
 854     default:
 855         assert(0);
 856         return false;
 857     }
 858 }
 859
 860 static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
 861                                    int neq) {
 862     for (int i1=0; i1 < ns1->used; i1++) {
 863         struct tree *t1 = ns1->nodes[i1];
 864         for (int i2=0; i2 < ns2->used; i2++) {
 865             struct tree *t2 = ns2->nodes[i2];
 866             if (neq) {
 867                 if (!streqx(t1->value, t2->value))
 868                     return 1;
 869             } else {
 870                 if (streqx(t1->value, t2->value))
 871                     return 1;
 872             }
 873         }
 874     }
 875     return 0;
 876 }
 877
 878 static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
 879                                   int neq) {
 880     for (int i=0; i < ns->used; i++) {
 881         struct tree *t = ns->nodes[i];
 882         if (neq) {
 883             if (!streqx(t->value, s))
 884                 return 1;
 885         } else {
 886             if (streqx(t->value, s))
 887                 return 1;
 888         }
 889     }
 890     return 0;
 891 }
 892
 893 static void eval_eq(struct state *state, int neq) {
 894     struct value *r = pop_value(state);
 895     struct value *l = pop_value(state);
 896     int res;
 897
 898     if (l->tag == T_NODESET && r->tag == T_NODESET) {
 899         res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
 900     } else if (l->tag == T_NODESET) {
 901         res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
 902     } else if (r->tag == T_NODESET) {
 903         res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
 904     } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
 905         if (neq)
 906             res = (l->number != r->number);
 907         else
 908             res = (l->number == r->number);
 909     } else {
 910         assert(l->tag == T_STRING);
 911         assert(r->tag == T_STRING);
 912         res = streqx(l->string, r->string);
 913         if (neq)
 914             res = !res;
 915     }
 916     RET_ON_ERROR;
 917
 918     push_boolean_value(res, state);
 919 }
 920
 921 static void eval_arith(struct state *state, enum binary_op op) {
 922     value_ind_t vind = make_value(T_NUMBER, state);
 923     struct value *r = pop_value(state);
 924     struct value *l = pop_value(state);
 925     int res;
 926
 927     assert(l->tag == T_NUMBER);
 928     assert(r->tag == T_NUMBER);
 929
 930     RET_ON_ERROR;
 931
 932     if (op == OP_PLUS)
 933         res = l->number + r->number;
 934     else if (op == OP_MINUS)
 935         res = l->number - r->number;
 936     else if (op == OP_STAR)
 937         res = l->number * r->number;
 938     else
 939         assert(0);
 940
 941     state->value_pool[vind].number = res;
 942     push_value(vind, state);
 943 }
 944
 945 static void eval_rel(struct state *state, bool greater, bool equal) {
 946     struct value *r, *l;
 947     int res;
 948
 949     /* We always check l < r or l <= r */
 950     if (greater) {
 951         l = pop_value(state);
 952         r = pop_value(state);
 953     } else {
 954         r = pop_value(state);
 955         l = pop_value(state);
 956     }
 957     if (l->tag == T_NUMBER) {
 958         if (equal)
 959             res = (l->number <= r->number);
 960         else
 961             res = (l->number < r->number);
 962     } else if (l->tag == T_STRING) {
 963         int cmp = strcmp(l->string, r->string);
 964         if (equal)
 965             res = cmp <= 0;
 966         else
 967             res = cmp < 0;
 968     } else {
 969         assert(0);
 970     }
 971
 972     push_boolean_value(res, state);
 973 }
 974
 975 static void eval_and_or(struct state *state, enum binary_op op) {
 976     struct value *r = pop_value(state);
 977     struct value *l = pop_value(state);
 978     bool left = coerce_to_bool(l);
 979     bool right = coerce_to_bool(r);
 980
 981
 982     if (op == OP_AND)
 983         push_boolean_value(left && right, state);
 984     else
 985         push_boolean_value(left || right, state);
 986 }
 987
 988 static bool eval_re_match_str(struct state *state, struct regexp *rx,
 989                               const char *str) {
 990     int r;
 991
 992     if (str == NULL)
 993         str = "";
 994
 995     r = regexp_match(rx, str, strlen(str), 0, NULL);
 996     if (r == -2) {
 997         STATE_ERROR(state, PATHX_EINTERNAL);
 998     } else if (r == -3) {
 999         /* We should never get this far; func_regexp should catch
1000          * invalid regexps */
1001         assert(false);
1002     }
1003     return r == strlen(str);
1004 }
1005
1006 static void eval_union(struct state *state) {
1007     value_ind_t vind = make_value(T_NODESET, state);
1008     struct value *r = pop_value(state);
1009     struct value *l = pop_value(state);
1010     struct nodeset *res = NULL;
1011
1012     assert(l->tag == T_NODESET);
1013     assert(r->tag == T_NODESET);
1014
1015     RET_ON_ERROR;
1016
1017     res = clone_nodeset(l->nodeset, state);
1018     RET_ON_ERROR;
1019     for (int i=0; i < r->nodeset->used; i++) {
1020         ns_add(res, r->nodeset->nodes[i], state);
1021         if (HAS_ERROR(state))
1022             goto error;
1023     }
1024     state->value_pool[vind].nodeset = res;
1025     push_value(vind, state);
1026  error:
1027     ns_clear_added(res);
1028 }
1029
1030 static void eval_concat_string(struct state *state) {
1031     value_ind_t vind = make_value(T_STRING, state);
1032     struct value *r = pop_value(state);
1033     struct value *l = pop_value(state);
1034     char *res = NULL;
1035
1036     RET_ON_ERROR;
1037
1038     if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1039         STATE_ENOMEM;
1040         return;
1041     }
1042     strcpy(res, l->string);
1043     strcat(res, r->string);
1044     state->value_pool[vind].string = res;
1045     push_value(vind, state);
1046 }
1047
1048 static void eval_concat_regexp(struct state *state) {
1049     value_ind_t vind = make_value(T_REGEXP, state);
1050     struct value *r = pop_value(state);
1051     struct value *l = pop_value(state);
1052     struct regexp *rx = NULL;
1053
1054     RET_ON_ERROR;
1055
1056     rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1057     if (rx == NULL) {
1058         STATE_ENOMEM;
1059         return;
1060     }
1061
1062     state->value_pool[vind].regexp = rx;
1063     push_value(vind, state);
1064 }
1065
1066 static void eval_re_match(struct state *state, enum binary_op op) {
1067     struct value *rx  = pop_value(state);
1068     struct value *v = pop_value(state);
1069
1070     bool result = false;
1071
1072     if (v->tag == T_STRING) {
1073         result = eval_re_match_str(state, rx->regexp, v->string);
1074         RET_ON_ERROR;
1075     } else if (v->tag == T_NODESET) {
1076         for (int i=0; i < v->nodeset->used && result == false; i++) {
1077             struct tree *t = v->nodeset->nodes[i];
1078             result = eval_re_match_str(state, rx->regexp, t->value);
1079             RET_ON_ERROR;
1080         }
1081     }
1082     if (op == OP_RE_NOMATCH)
1083         result = !result;
1084     push_boolean_value(result, state);
1085 }
1086
1087 static void eval_binary(struct expr *expr, struct state *state) {
1088     eval_expr(expr->left, state);
1089     eval_expr(expr->right, state);
1090     RET_ON_ERROR;
1091
1092     switch (expr->op) {
1093     case OP_EQ:
1094         eval_eq(state, 0);
1095         break;
1096     case OP_NEQ:
1097         eval_eq(state, 1);
1098         break;
1099     case OP_LT:
1100         eval_rel(state, false, false);
1101         break;
1102     case OP_LE:
1103         eval_rel(state, false, true);
1104         break;
1105     case OP_GT:
1106         eval_rel(state, true, false);
1107         break;
1108     case OP_GE:
1109         eval_rel(state, true, true);
1110         break;
1111     case OP_PLUS:
1112         if (expr->type == T_NUMBER)
1113             eval_arith(state, expr->op);
1114         else if (expr->type == T_STRING)
1115             eval_concat_string(state);
1116         else if (expr->type == T_REGEXP)
1117             eval_concat_regexp(state);
1118         break;
1119     case OP_MINUS:
1120     case OP_STAR:
1121         eval_arith(state, expr->op);
1122         break;
1123     case OP_AND:
1124     case OP_OR:
1125         eval_and_or(state, expr->op);
1126         break;
1127     case OP_UNION:
1128         eval_union(state);
1129         break;
1130     case OP_RE_MATCH:
1131     case OP_RE_NOMATCH:
1132         eval_re_match(state, expr->op);
1133         break;
1134     default:
1135         assert(0);
1136     }
1137 }
1138
1139 static void eval_app(struct expr *expr, struct state *state) {
1140     assert(expr->tag == E_APP);
1141
1142     for (int i=0; i < expr->func->arity; i++) {
1143         eval_expr(expr->args[i], state);
1144         RET_ON_ERROR;
1145     }
1146     expr->func->impl(state, expr->func->arity);
1147 }
1148
1149 static bool eval_pred(struct expr *expr, struct state *state) {
1150     eval_expr(expr, state);
1151     RET0_ON_ERROR;
1152
1153     struct value *v = pop_value(state);
1154     switch(v->tag) {
1155     case T_BOOLEAN:
1156         return v->boolval;
1157     case T_NUMBER:
1158         return (state->ctx_pos == v->number);
1159     case T_NODESET:
1160         return v->nodeset->used > 0;
1161     case T_STRING:
1162         return streqv(state->ctx->value, v->string);
1163     default:
1164         assert(0);
1165         return false;
1166     }
1167 }
1168
1169 /* Remove COUNT successive entries from NS. The first entry to remove is at
1170    IND */
1171 static void ns_remove(struct nodeset *ns, int ind, int count) {
1172     if (count < 1)
1173         return;
1174     memmove(ns->nodes + ind, ns->nodes + ind+count,
1175             sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1176     ns->used -= count;
1177 }
1178
1179 /*
1180  * Remove all nodes from NS for which one of PRED is false
1181  */
1182 static void ns_filter(struct nodeset *ns, struct pred *predicates,
1183                       struct state *state) {
1184     if (predicates == NULL)
1185         return;
1186
1187     struct tree *old_ctx = state->ctx;
1188     uint old_ctx_len = state->ctx_len;
1189     uint old_ctx_pos = state->ctx_pos;
1190
1191     for (int p=0; p < predicates->nexpr; p++) {
1192         int first_bad = -1;  /* The index of the first non-matching node */
1193         state->ctx_len = ns->used;
1194         state->ctx_pos = 1;
1195         for (int i=0; i < ns->used; state->ctx_pos++) {
1196             state->ctx = ns->nodes[i];
1197             bool match = eval_pred(predicates->exprs[p], state);
1198             RET_ON_ERROR;
1199             /* We remove non-matching nodes from NS in batches; this logic
1200              * makes sure that we only call ns_remove at the end of a run
1201              * of non-matching nodes
1202              */
1203             if (match) {
1204                 if (first_bad >= 0) {
1205                     ns_remove(ns, first_bad, i - first_bad);
1206                     i = first_bad + 1;
1207                 } else {
1208                     i += 1;
1209                 }
1210                 first_bad = -1;
1211             } else {
1212                 if (first_bad == -1)
1213                     first_bad = i;
1214                 i += 1;
1215             }
1216         }
1217         if (first_bad >= 0) {
1218             ns_remove(ns, first_bad, ns->used - first_bad);
1219         }
1220     }
1221
1222     state->ctx = old_ctx;
1223     state->ctx_pos = old_ctx_pos;
1224     state->ctx_len = old_ctx_len;
1225 }
1226
1227 /* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1228 static bool position_pred(struct pred *pred) {
1229     return pred != NULL &&
1230         pred->nexpr == 1 &&
1231         pred->exprs[0]->tag == E_VALUE &&
1232         pred->exprs[0]->type == T_NUMBER;
1233 }
1234
1235 /* Return the tree node at the position implied by STEP->PREDICATES. It is
1236    assumed and required that STEP->PREDICATES is actually a
1237    POSITION_PRED.
1238
1239    This method hand-optimizes the important case of a path expression like
1240    'service[42]'
1241 */
1242 static struct tree *position_filter(struct nodeset *ns,
1243                                     struct step *step,
1244                                     struct state *state) {
1245     int value_ind = step->predicates->exprs[0]->value_ind;
1246     int number = state->value_pool[value_ind].number;
1247
1248     int pos = 1;
1249     for (int i=0; i < ns->used; i++) {
1250         for (struct tree *node = step_first(step, ns->nodes[i]);
1251              node != NULL;
1252              node = step_next(step, ns->nodes[i], node), pos++) {
1253             if (pos == number)
1254                 return node;
1255         }
1256     }
1257
1258     return NULL;
1259 }
1260
1261 /* Return an array of nodesets, one for each step in the locpath.
1262  *
1263  * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1264  * contain the nodes that matched the entire locpath
1265  */
1266 static void ns_from_locpath(struct locpath *lp, uint *maxns,
1267                             struct nodeset ***ns,
1268                             const struct nodeset *root,
1269                             struct state *state) {
1270     struct tree *old_ctx = state->ctx;
1271     *maxns = 0;
1272
1273     ensure(lp != NULL, state);
1274
1275     *ns = NULL;
1276     list_for_each(step, lp->steps)
1277         *maxns += 1;
1278     if (ALLOC_N(*ns, *maxns+1) < 0) {
1279         STATE_ERROR(state, PATHX_ENOMEM);
1280         goto error;
1281     }
1282     for (int i=0; i <= *maxns; i++) {
1283         (*ns)[i] = make_nodeset(state);
1284         if (HAS_ERROR(state))
1285             goto error;
1286     }
1287
1288     if (root == NULL) {
1289         struct step *first_step = NULL;
1290         first_step = lp->steps;
1291
1292         struct tree *root_tree;
1293         root_tree = step_root(first_step, state->ctx, state->root_ctx);
1294         ns_add((*ns)[0], root_tree, state);
1295         ns_clear_added((*ns)[0]);
1296     } else {
1297         for (int i=0; i < root->used; i++)
1298             ns_add((*ns)[0], root->nodes[i], state);
1299         ns_clear_added((*ns)[0]);
1300     }
1301
1302     if (HAS_ERROR(state))
1303         goto error;
1304
1305     uint cur_ns = 0;
1306     list_for_each(step, lp->steps) {
1307         struct nodeset *work = (*ns)[cur_ns];
1308         struct nodeset *next = (*ns)[cur_ns + 1];
1309         if (position_pred(step->predicates)) {
1310             struct tree *node = position_filter(work, step, state);
1311             if (node) {
1312                 ns_add(next, node, state);
1313                 ns_clear_added(next);
1314             }
1315         } else {
1316             for (int i=0; i < work->used; i++) {
1317                 for (struct tree *node = step_first(step, work->nodes[i]);
1318                      node != NULL;
1319                      node = step_next(step, work->nodes[i], node)) {
1320                     ns_add(next, node, state);
1321                 }
1322             }
1323             ns_clear_added(next);
1324             ns_filter(next, step->predicates, state);
1325             if (HAS_ERROR(state))
1326                 goto error;
1327         }
1328         cur_ns += 1;
1329     }
1330
1331     state->ctx = old_ctx;
1332     return;
1333  error:
1334     if (*ns != NULL) {
1335         for (int i=0; i <= *maxns; i++)
1336             free_nodeset((*ns)[i]);
1337         FREE(*ns);
1338     }
1339     state->ctx = old_ctx;
1340     return;
1341 }
1342
1343 static void eval_filter(struct expr *expr, struct state *state) {
1344     struct locpath *lp = expr->locpath;
1345     struct nodeset **ns = NULL;
1346     struct locpath_trace *lpt = state->locpath_trace;
1347     uint maxns;
1348
1349     state->locpath_trace = NULL;
1350     if (expr->primary == NULL) {
1351         ns_from_locpath(lp, &maxns, &ns, NULL, state);
1352     } else {
1353         eval_expr(expr->primary, state);
1354         RET_ON_ERROR;
1355         value_ind_t primary_ind = pop_value_ind(state);
1356         struct value *primary = state->value_pool + primary_ind;
1357         assert(primary->tag == T_NODESET);
1358         ns_filter(primary->nodeset, expr->predicates, state);
1359         /* Evaluating predicates might have reallocated the value_pool */
1360         primary = state->value_pool + primary_ind;
1361         ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1362     }
1363     RET_ON_ERROR;
1364
1365     value_ind_t vind = make_value(T_NODESET, state);
1366     RET_ON_ERROR;
1367     state->value_pool[vind].nodeset = ns[maxns];
1368     push_value(vind, state);
1369
1370     if (lpt != NULL) {
1371         assert(lpt->ns == NULL);
1372         assert(lpt->lp == NULL);
1373         lpt->maxns = maxns;
1374         lpt->ns = ns;
1375         lpt->lp = lp;
1376         state->locpath_trace = lpt;
1377     } else {
1378         for (int i=0; i < maxns; i++)
1379             free_nodeset(ns[i]);
1380         FREE(ns);
1381     }
1382 }
1383
1384 static struct value *lookup_var(const char *ident,
1385                                 const struct pathx_symtab *symtab) {
1386     list_for_each(tab, symtab) {
1387         if (STREQ(ident, tab->name))
1388             return tab->value;
1389     }
1390     return NULL;
1391 }
1392
1393 static void eval_var(struct expr *expr, struct state *state) {
1394     struct value *v = lookup_var(expr->ident, state->symtab);
1395     value_ind_t vind = clone_value(v, state);
1396     RET_ON_ERROR;
1397     push_value(vind, state);
1398 }
1399
1400 static void eval_expr(struct expr *expr, struct state *state) {
1401     RET_ON_ERROR;
1402     switch (expr->tag) {
1403     case E_FILTER:
1404         eval_filter(expr, state);
1405         break;
1406     case E_BINARY:
1407         eval_binary(expr, state);
1408         break;
1409     case E_VALUE:
1410         push_value(expr->value_ind, state);
1411         break;
1412     case E_VAR:
1413         eval_var(expr, state);
1414         break;
1415     case E_APP:
1416         eval_app(expr, state);
1417         if (expr->fold) {
1418             /* Do constant folding: replace the function application with
1419              * a reference to the value that resulted from evaluating it */
1420             for (int i=0; i < expr->func->arity; i++)
1421                 free_expr(expr->args[i]);
1422             free(expr->args);
1423             value_ind_t vind = state->values_used - 1;
1424             expr->tag = E_VALUE;
1425             expr->value_ind = state->values[vind];
1426         }
1427         break;
1428     default:
1429         assert(0);
1430     }
1431 }
1432
1433 /*************************************************************************
1434  * Typechecker
1435  *************************************************************************/
1436
1437 static void check_expr(struct expr *expr, struct state *state);
1438
1439 /* Typecheck a list of predicates. A predicate is a function of
1440  * one of the following types:
1441  *
1442  * T_NODESET -> T_BOOLEAN
1443  * T_NUMBER  -> T_BOOLEAN  (position test)
1444  * T_BOOLEAN -> T_BOOLEAN
1445  */
1446 static void check_preds(struct pred *pred, struct state *state) {
1447     if (pred == NULL)
1448         return;
1449     for (int i=0; i < pred->nexpr; i++) {
1450         struct expr *e = pred->exprs[i];
1451         check_expr(e, state);
1452         RET_ON_ERROR;
1453         if (e->type != T_NODESET && e->type != T_NUMBER &&
1454             e->type != T_BOOLEAN && e->type != T_STRING) {
1455             STATE_ERROR(state, PATHX_ETYPE);
1456             return;
1457         }
1458     }
1459 }
1460
1461 static void check_filter(struct expr *expr, struct state *state) {
1462     assert(expr->tag == E_FILTER);
1463     struct locpath *locpath = expr->locpath;
1464
1465     if (expr->primary != NULL) {
1466         check_expr(expr->primary, state);
1467         if (expr->primary->type != T_NODESET) {
1468             STATE_ERROR(state, PATHX_ETYPE);
1469             return;
1470         }
1471         check_preds(expr->predicates, state);
1472         RET_ON_ERROR;
1473     }
1474     list_for_each(s, locpath->steps) {
1475         check_preds(s->predicates, state);
1476         RET_ON_ERROR;
1477     }
1478     expr->type = T_NODESET;
1479 }
1480
1481 static void check_app(struct expr *expr, struct state *state) {
1482     assert(expr->tag == E_APP);
1483
1484     for (int i=0; i < expr->func->arity; i++) {
1485         check_expr(expr->args[i], state);
1486         RET_ON_ERROR;
1487     }
1488
1489     int f;
1490     for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1491         const struct func *fn = builtin_funcs + f;
1492         if (STRNEQ(expr->func->name, fn->name))
1493             continue;
1494         if (expr->func->arity != fn->arity)
1495             continue;
1496
1497         int match = 1;
1498         for (int i=0; i < expr->func->arity; i++) {
1499             if (expr->args[i]->type != fn->arg_types[i]) {
1500                 match = 0;
1501                 break;
1502             }
1503         }
1504         if (match)
1505             break;
1506     }
1507
1508     if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1509         expr->func = builtin_funcs + f;
1510         expr->type = expr->func->type;
1511         expr->fold = expr->func->pure;
1512         if (expr->fold) {
1513             /* We only do constant folding for invocations of pure functions
1514              * whose arguments are literal values. That misses opportunities
1515              * for constant folding, e.g., "regexp('foo' + 'bar')" but is
1516              * a bit simpler than doing full tracking of constants
1517              */
1518             for (int i=0; i < expr->func->arity; i++) {
1519                 if (expr->args[i]->tag != E_VALUE)
1520                     expr->fold = false;
1521             }
1522         }
1523     } else {
1524         STATE_ERROR(state, PATHX_ETYPE);
1525     }
1526 }
1527
1528 /* Check the binary operators. Type rules:
1529  *
1530  * '=', '!='  : T_NODESET -> T_NODESET -> T_BOOLEAN
1531  *              T_STRING  -> T_NODESET -> T_BOOLEAN
1532  *              T_NODESET -> T_STRING  -> T_BOOLEAN
1533  *              T_NUMBER  -> T_NUMBER  -> T_BOOLEAN
1534  *
1535  * '>', '>=',
1536  * '<', '<='  : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1537  *              T_STRING -> T_STRING -> T_BOOLEAN
1538  * '+'        : T_NUMBER -> T_NUMBER -> T_NUMBER
1539  *              T_STRING -> T_STRING -> T_STRING
1540  *              T_REGEXP -> T_REGEXP -> T_REGEXP
1541  * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1542  *
1543  * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1544  * '=~', '!~' : T_STRING  -> T_REGEXP  -> T_BOOLEAN
1545  *              T_NODESET -> T_REGEXP  -> T_BOOLEAN
1546  *
1547  * '|'        : T_NODESET -> T_NODESET -> T_NODESET
1548  *
1549  * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1550  */
1551 static void check_binary(struct expr *expr, struct state *state) {
1552     check_expr(expr->left, state);
1553     check_expr(expr->right, state);
1554     RET_ON_ERROR;
1555
1556     enum type l = expr->left->type;
1557     enum type r = expr->right->type;
1558     int  ok = 1;
1559     enum type res;
1560
1561     switch(expr->op) {
1562     case OP_EQ:
1563     case OP_NEQ:
1564         ok = ((l == T_NODESET || l == T_STRING)
1565               && (r == T_NODESET || r == T_STRING))
1566             || (l == T_NUMBER && r == T_NUMBER);;
1567         res = T_BOOLEAN;
1568         break;
1569     case OP_LT:
1570     case OP_LE:
1571     case OP_GT:
1572     case OP_GE:
1573         ok = (l == T_NUMBER && r == T_NUMBER)
1574             || (l == T_STRING && r == T_STRING);
1575         res = T_BOOLEAN;
1576         break;
1577     case OP_PLUS:
1578         ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1579         res = l;
1580         break;
1581     case OP_MINUS:
1582     case OP_STAR:
1583         ok =  (l == T_NUMBER && r == T_NUMBER);
1584         res = T_NUMBER;
1585         break;
1586     case OP_UNION:
1587         ok = (l == T_NODESET && r == T_NODESET);
1588         res = T_NODESET;
1589         break;
1590     case OP_AND:
1591     case OP_OR:
1592         ok = 1;
1593         res = T_BOOLEAN;
1594         break;
1595     case OP_RE_MATCH:
1596     case OP_RE_NOMATCH:
1597         ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1598         res = T_BOOLEAN;
1599         break;
1600     default:
1601         assert(0);
1602     }
1603     if (! ok) {
1604         STATE_ERROR(state, PATHX_ETYPE);
1605     } else {
1606         expr->type = res;
1607     }
1608 }
1609
1610 static void check_var(struct expr *expr, struct state *state) {
1611     struct value *v = lookup_var(expr->ident, state->symtab);
1612     if (v == NULL) {
1613         STATE_ERROR(state, PATHX_ENOVAR);
1614         return;
1615     }
1616     expr->type = v->tag;
1617 }
1618
1619 /* Typecheck an expression */
1620 static void check_expr(struct expr *expr, struct state *state) {
1621     RET_ON_ERROR;
1622     switch(expr->tag) {
1623     case E_FILTER:
1624         check_filter(expr, state);
1625         break;
1626     case E_BINARY:
1627         check_binary(expr, state);
1628         break;
1629     case E_VALUE:
1630         expr->type = expr_value(expr, state)->tag;
1631         break;
1632     case E_VAR:
1633         check_var(expr, state);
1634         break;
1635     case E_APP:
1636         check_app(expr, state);
1637         break;
1638     default:
1639         assert(0);
1640     }
1641 }
1642
1643 /*
1644  * Utility functions for the parser
1645  */
1646
1647 static void skipws(struct state *state) {
1648     while (isspace(*state->pos)) state->pos += 1;
1649 }
1650
1651 static int match(struct state *state, char m) {
1652     skipws(state);
1653
1654     if (*state->pos == '\0')
1655         return 0;
1656     if (*state->pos == m) {
1657         state->pos += 1;
1658         return 1;
1659     }
1660     return 0;
1661 }
1662
1663 static int peek(struct state *state, const char *chars) {
1664     return strchr(chars, *state->pos) != NULL;
1665 }
1666
1667 /* Return 1 if STATE->POS starts with TOKEN, followed by optional
1668  * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1669  * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1670  * unchanged.
1671  */
1672 static int looking_at(struct state *state, const char *token,
1673                       const char *follow) {
1674     if (STREQLEN(state->pos, token, strlen(token))) {
1675         const char *p = state->pos + strlen(token);
1676         while (isspace(*p)) p++;
1677         if (STREQLEN(p, follow, strlen(follow))) {
1678             state->pos = p + strlen(follow);
1679             return 1;
1680         }
1681     }
1682     return 0;
1683 }
1684
1685 /*************************************************************************
1686  * The parser
1687  *************************************************************************/
1688
1689 static void parse_expr(struct state *state);
1690
1691 static struct expr* pop_expr(struct state *state) {
1692     if (state->exprs_used > 0) {
1693         state->exprs_used -= 1;
1694         return state->exprs[state->exprs_used];
1695     } else {
1696         STATE_ERROR(state, PATHX_EINTERNAL);
1697         assert(0);
1698         return NULL;
1699     }
1700 }
1701
1702 static void push_expr(struct expr *expr, struct state *state) {
1703     if (state->exprs_used >= state->exprs_size) {
1704         size_t new_size = 2*state->exprs_size;
1705         if (new_size == 0) new_size = 8;
1706         if (REALLOC_N(state->exprs, new_size) < 0) {
1707             STATE_ENOMEM;
1708             return;
1709         }
1710         state->exprs_size = new_size;
1711     }
1712     state->exprs[state->exprs_used++] = expr;
1713 }
1714
1715 static void push_new_binary_op(enum binary_op op, struct state *state) {
1716     struct expr *expr = NULL;
1717     if (ALLOC(expr) < 0) {
1718         STATE_ENOMEM;
1719         return;
1720     }
1721
1722     expr->tag = E_BINARY;
1723     expr->op  = op;
1724     expr->right = pop_expr(state);
1725     expr->left = pop_expr(state);
1726     push_expr(expr, state);
1727 }
1728
1729 int pathx_escape_name(const char *in, char **out) {
1730     const char *p;
1731     int num_to_escape = 0;
1732     char *s;
1733
1734     *out = NULL;
1735
1736     for (p = in; *p; p++) {
1737         if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1738             num_to_escape += 1;
1739     }
1740
1741     if (num_to_escape == 0)
1742         return 0;
1743
1744     if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1745         return -1;
1746
1747     for (p = in, s = *out; *p; p++) {
1748         if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1749             *s++ = '\\';
1750         *s++ = *p;
1751     }
1752     *s = '\0';
1753     return 0;
1754 }
1755
1756 /* Return true if POS is preceded by an odd number of backslashes, i.e., if
1757  * POS is escaped. Stop the search when we get to START */
1758 static bool backslash_escaped(const char *pos, const char *start) {
1759     bool result=false;
1760     while (pos-- > start && *pos == '\\') {
1761         result = !result;
1762     }
1763     return result;
1764 }
1765
1766 /*
1767  * NameNoWS ::= [^][|/\= \t\n] | \\.
1768  * NameWS   ::= [^][|/\=] | \\.
1769  * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1770  */
1771 static char *parse_name(struct state *state) {
1772     const char *s = state->pos;
1773     char *result;
1774
1775     /* Advance state->pos until it points to the first character that is
1776      * not part of a name. */
1777     while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1778         /* Since we allow spaces in names, we need to avoid gobbling up
1779          * stuff that is in follow(Name), e.g. 'or' so that things like
1780          * [name1 or name2] still work. In other words, we'll parse 'x frob
1781          * y' as one name, but for 'x or y', we consider 'x' a name in its
1782          * own right. */
1783         if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1784             STREQLEN(state->pos, " and ", strlen(" and ")))
1785             break;
1786
1787         if (*state->pos == '\\') {
1788             state->pos += 1;
1789             if (*state->pos == '\0') {
1790                 STATE_ERROR(state, PATHX_ENAME);
1791                 return NULL;
1792             }
1793         }
1794         state->pos += 1;
1795     }
1796
1797     /* Strip trailing white space. Make sure we respect escaped whitespace
1798      * and don't strip it as in "x\\ " */
1799     if (state->pos > s) {
1800         state->pos -= 1;
1801         while (isspace(*state->pos) && state->pos > s
1802                && !backslash_escaped(state->pos, s))
1803             state->pos -= 1;
1804         state->pos += 1;
1805     }
1806
1807     if (state->pos == s) {
1808         STATE_ERROR(state, PATHX_ENAME);
1809         return NULL;
1810     }
1811
1812     result = strndup(s, state->pos - s);
1813     if (result == NULL) {
1814         STATE_ENOMEM;
1815         return NULL;
1816     }
1817
1818     char *p = result;
1819     for (char *t = result; *t != '\0'; t++, p++) {
1820         if (*t == '\\')
1821             t += 1;
1822         *p = *t;
1823     }
1824     *p = '\0';
1825
1826     return result;
1827 }
1828
1829 /*
1830  * Predicate    ::= "[" Expr "]" *
1831  */
1832 static struct pred *parse_predicates(struct state *state) {
1833     struct pred *pred = NULL;
1834     int nexpr = 0;
1835
1836     while (match(state, L_BRACK)) {
1837         parse_expr(state);
1838         nexpr += 1;
1839         RET0_ON_ERROR;
1840
1841         if (! match(state, R_BRACK)) {
1842             STATE_ERROR(state, PATHX_EPRED);
1843             return NULL;
1844         }
1845         skipws(state);
1846     }
1847
1848     if (nexpr == 0)
1849         return NULL;
1850
1851     if (ALLOC(pred) < 0) {
1852         STATE_ENOMEM;
1853         return NULL;
1854     }
1855     pred->nexpr = nexpr;
1856
1857     if (ALLOC_N(pred->exprs, nexpr) < 0) {
1858         free_pred(pred);
1859         STATE_ENOMEM;
1860         return NULL;
1861     }
1862
1863     for (int i = nexpr - 1; i >= 0; i--)
1864         pred->exprs[i] = pop_expr(state);
1865
1866     return pred;
1867 }
1868
1869 /*
1870  * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1871  * AxisSpecifier ::= AxisName '::' | <epsilon>
1872  * AxisName ::= 'ancestor'
1873  *            | 'ancestor-or-self'
1874  *            | 'child'
1875  *            | 'descendant'
1876  *            | 'descendant-or-self'
1877  *            | 'parent'
1878  *            | 'self'
1879  *            | 'root'
1880  */
1881 static struct step *parse_step(struct state *state) {
1882     struct step *step;
1883     int explicit_axis = 0, allow_predicates = 1;
1884
1885     if (ALLOC(step) < 0) {
1886         STATE_ENOMEM;
1887         return NULL;
1888     }
1889
1890     step->axis = CHILD;
1891     for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1892         if (looking_at(state, axis_names[i], "::")) {
1893             step->axis = i;
1894             explicit_axis = 1;
1895             break;
1896         }
1897     }
1898
1899     if (! match(state, '*')) {
1900         step->name = parse_name(state);
1901         if (HAS_ERROR(state))
1902             goto error;
1903         if (! explicit_axis) {
1904             if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
1905                 step->axis = STREQ(step->name, ".") ? SELF : PARENT;
1906                 FREE(step->name);
1907                 allow_predicates = 0;
1908             }
1909         }
1910     }
1911
1912     if (allow_predicates) {
1913         step->predicates = parse_predicates(state);
1914         if (HAS_ERROR(state))
1915             goto error;
1916     }
1917
1918     return step;
1919
1920  error:
1921     free_step(step);
1922     return NULL;
1923 }
1924
1925 static struct step *make_step(enum axis axis, struct state *state) {
1926     struct step *result = NULL;
1927
1928     if (ALLOC(result) < 0) {
1929         STATE_ENOMEM;
1930         return NULL;
1931     }
1932     result->axis = axis;
1933     return result;
1934 }
1935
1936 /*
1937  * RelativeLocationPath ::= Step
1938  *                        | RelativeLocationPath '/' Step
1939  *                        | AbbreviatedRelativeLocationPath
1940  * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
1941  *
1942  * The above is the same as
1943  * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
1944  */
1945 static struct locpath *
1946 parse_relative_location_path(struct state *state) {
1947     struct step *step = NULL;
1948     struct locpath *locpath = NULL;
1949
1950     step = parse_step(state);
1951     if (HAS_ERROR(state))
1952         goto error;
1953
1954     if (ALLOC(locpath) < 0) {
1955         STATE_ENOMEM;
1956         goto error;
1957     }
1958     list_append(locpath->steps, step);
1959     step = NULL;
1960
1961     while (match(state, '/')) {
1962         if (*state->pos == '/') {
1963             state->pos += 1;
1964             step = make_step(DESCENDANT_OR_SELF, state);
1965             if (step == NULL) {
1966                 STATE_ENOMEM;
1967                 goto error;
1968             }
1969             list_append(locpath->steps, step);
1970         }
1971         step = parse_step(state);
1972         if (HAS_ERROR(state))
1973             goto error;
1974         list_append(locpath->steps, step);
1975         step = NULL;
1976     }
1977     return locpath;
1978
1979  error:
1980     free_step(step);
1981     free_locpath(locpath);
1982     return NULL;
1983 }
1984
1985 /*
1986  * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
1987  * AbsoluteLocationPath ::= '/' RelativeLocationPath?
1988  *                        | AbbreviatedAbsoluteLocationPath
1989  * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
1990  *
1991  */
1992 static void parse_location_path(struct state *state) {
1993     struct expr *expr = NULL;
1994     struct locpath *locpath = NULL;
1995
1996     if (match(state, '/')) {
1997         if (*state->pos == '/') {
1998             state->pos += 1;
1999             locpath = parse_relative_location_path(state);
2000             if (HAS_ERROR(state))
2001                 goto error;
2002             struct step *step = make_step(DESCENDANT_OR_SELF, state);
2003             if (HAS_ERROR(state))
2004                 goto error;
2005             list_cons(locpath->steps, step);
2006         } else {
2007             if (*state->pos != '\0') {
2008                 locpath = parse_relative_location_path(state);
2009             } else {
2010                 if (ALLOC(locpath) < 0)
2011                     goto err_nomem;
2012             }
2013             struct step *step = make_step(ROOT, state);
2014             if (HAS_ERROR(state)) {
2015                 free_step(step);
2016                 goto error;
2017             }
2018             list_cons(locpath->steps, step);
2019         }
2020     } else {
2021         locpath = parse_relative_location_path(state);
2022     }
2023
2024     if (ALLOC(expr) < 0)
2025         goto err_nomem;
2026     expr->tag = E_FILTER;
2027     expr->locpath = locpath;
2028     push_expr(expr, state);
2029     return;
2030
2031  err_nomem:
2032     STATE_ENOMEM;
2033  error:
2034     free_expr(expr);
2035     free_locpath(locpath);
2036     return;
2037 }
2038
2039 /*
2040  * Number       ::= /[0-9]+/
2041  */
2042 static void parse_number(struct state *state) {
2043     struct expr *expr = NULL;
2044     unsigned long val;
2045     char *end;
2046
2047     errno = 0;
2048     val = strtoul(state->pos, &end, 10);
2049     if (errno || end == state->pos || (int) val != val) {
2050         STATE_ERROR(state, PATHX_ENUMBER);
2051         return;
2052     }
2053
2054     state->pos = end;
2055
2056     if (ALLOC(expr) < 0)
2057         goto err_nomem;
2058     expr->tag = E_VALUE;
2059     expr->value_ind = make_value(T_NUMBER, state);
2060     if (HAS_ERROR(state))
2061         goto error;
2062     expr_value(expr, state)->number = val;
2063
2064     push_expr(expr, state);
2065     return;
2066
2067  err_nomem:
2068     STATE_ENOMEM;
2069  error:
2070     free_expr(expr);
2071     return;
2072 }
2073
2074 /*
2075  * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2076  */
2077 static void parse_literal(struct state *state) {
2078     char delim;
2079     const char *s;
2080     struct expr *expr = NULL;
2081
2082     if (*state->pos == '"')
2083         delim = '"';
2084     else if (*state->pos == '\'')
2085         delim = '\'';
2086     else {
2087         STATE_ERROR(state, PATHX_ESTRING);
2088         return;
2089     }
2090     state->pos += 1;
2091
2092     s = state->pos;
2093     while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2094
2095     if (*state->pos != delim) {
2096         STATE_ERROR(state, PATHX_EDELIM);
2097         return;
2098     }
2099     state->pos += 1;
2100
2101     if (ALLOC(expr) < 0)
2102         goto err_nomem;
2103     expr->tag = E_VALUE;
2104     expr->value_ind = make_value(T_STRING, state);
2105     if (HAS_ERROR(state))
2106         goto error;
2107     expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2108     if (expr_value(expr, state)->string == NULL)
2109         goto err_nomem;
2110
2111     push_expr(expr, state);
2112     return;
2113
2114  err_nomem:
2115     STATE_ENOMEM;
2116  error:
2117     free_expr(expr);
2118     return;
2119 }
2120
2121 /*
2122  * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2123  */
2124 static void parse_function_call(struct state *state) {
2125     const struct func *func = NULL;
2126     struct expr *expr = NULL;
2127     int nargs = 0, find = 0;
2128
2129     for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2130         if (looking_at(state, builtin_funcs[find].name, "(")) {
2131             func = builtin_funcs + find;
2132             break;
2133         }
2134     }
2135     if (func == NULL) {
2136         STATE_ERROR(state, PATHX_ENAME);
2137         return;
2138     }
2139
2140     if (! match(state, ')')) {
2141         do {
2142             nargs += 1;
2143             parse_expr(state);
2144             RET_ON_ERROR;
2145         } while (match(state, ','));
2146
2147         if (! match(state, ')')) {
2148             STATE_ERROR(state, PATHX_EPAREN);
2149             return;
2150         }
2151     }
2152
2153     int found = 0; /* Whether there is a builtin matching in name and arity */
2154     for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2155         if (STRNEQ(func->name, builtin_funcs[i].name))
2156             break;
2157         if (builtin_funcs[i].arity == nargs) {
2158             func = builtin_funcs + i;
2159             found = 1;
2160             break;
2161         }
2162     }
2163
2164     if (! found) {
2165         STATE_ERROR(state, PATHX_EARITY);
2166         return;
2167     }
2168
2169     if (ALLOC(expr) < 0) {
2170         STATE_ENOMEM;
2171         return;
2172     }
2173     expr->tag = E_APP;
2174     if (ALLOC_N(expr->args, nargs) < 0) {
2175         free_expr(expr);
2176         STATE_ENOMEM;
2177         return;
2178     }
2179     expr->func = func;
2180     for (int i = nargs - 1; i >= 0; i--)
2181         expr->args[i] = pop_expr(state);
2182
2183     push_expr(expr, state);
2184 }
2185
2186 /*
2187  * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2188  *
2189  * The '$' is consumed by parse_primary_expr
2190  */
2191 static void parse_var(struct state *state) {
2192     const char *id = state->pos;
2193     struct expr *expr = NULL;
2194
2195     if (!isalpha(*id) && *id != '_') {
2196         STATE_ERROR(state, PATHX_ENAME);
2197         return;
2198     }
2199     id++;
2200     while (isalpha(*id) || isdigit(*id) || *id == '_')
2201         id += 1;
2202
2203     if (ALLOC(expr) < 0)
2204         goto err_nomem;
2205     expr->tag = E_VAR;
2206     expr->ident = strndup(state->pos, id - state->pos);
2207     if (expr->ident == NULL)
2208         goto err_nomem;
2209
2210     push_expr(expr, state);
2211     state->pos = id;
2212     return;
2213  err_nomem:
2214     STATE_ENOMEM;
2215     free_expr(expr);
2216     return;
2217 }
2218
2219 /*
2220  * PrimaryExpr ::= Literal
2221  *               | Number
2222  *               | FunctionCall
2223  *               | VariableReference
2224  *               | '(' Expr ')'
2225  *
2226  */
2227 static void parse_primary_expr(struct state *state) {
2228     if (peek(state, "'\"")) {
2229         parse_literal(state);
2230     } else if (peek(state, "0123456789")) {
2231         parse_number(state);
2232     } else if (match(state, '(')) {
2233         parse_expr(state);
2234         RET_ON_ERROR;
2235         if (! match(state, ')')) {
2236             STATE_ERROR(state, PATHX_EPAREN);
2237             return;
2238         }
2239     } else if (match(state, '$')) {
2240         parse_var(state);
2241     } else {
2242         parse_function_call(state);
2243     }
2244 }
2245
2246 static int looking_at_primary_expr(struct state *state) {
2247     const char *s = state->pos;
2248     /* Is it a Number, Literal or VariableReference ? */
2249     if (peek(state, "$'\"0123456789"))
2250         return 1;
2251
2252     /* Or maybe a function call, i.e. a word followed by a '(' ?
2253      * Note that our function names are only [a-zA-Z]+
2254      */
2255     while (*s != '\0' && isalpha(*s)) s++;
2256     while (*s != '\0' && isspace(*s)) s++;
2257     return *s == '(';
2258 }
2259
2260 /*
2261  * PathExpr ::= LocationPath
2262  *            | FilterExpr
2263  *            | FilterExpr '/' RelativeLocationPath
2264  *            | FilterExpr '//' RelativeLocationPath
2265  *
2266  * FilterExpr ::= PrimaryExpr Predicate
2267  *
2268  * The grammar is ambiguous here: the expression '42' can either be the
2269  * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2270  * reason for this ambiguity is that we allow node names like '42' in the
2271  * tree; rather than forbid them, we resolve the ambiguity by always
2272  * parsing '42' as a number, and requiring that the user write the
2273  * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2274  */
2275 static void parse_path_expr(struct state *state) {
2276     struct expr *expr = NULL;
2277     struct pred *predicates = NULL;
2278     struct locpath *locpath = NULL;
2279
2280     if (looking_at_primary_expr(state)) {
2281         parse_primary_expr(state);
2282         RET_ON_ERROR;
2283         predicates = parse_predicates(state);
2284         RET_ON_ERROR;
2285         if (match(state, '/')) {
2286             if (match(state, '/')) {
2287                 locpath = parse_relative_location_path(state);
2288                 if (HAS_ERROR(state))
2289                     goto error;
2290
2291                 struct step *step = make_step(DESCENDANT_OR_SELF, state);
2292                 if (HAS_ERROR(state))
2293                     return;
2294                 list_cons(locpath->steps, step);
2295             } else {
2296                 if (*state->pos == '\0') {
2297                     STATE_ERROR(state, PATHX_EEND);
2298                     goto error;
2299                 }
2300                 locpath = parse_relative_location_path(state);
2301             }
2302         }
2303         /* A PathExpr without predicates and locpath is
2304          * just a PrimaryExpr
2305          */
2306         if (predicates == NULL && locpath == NULL)
2307             return;
2308         /* To make evaluation easier, we parse something like
2309          *   $var[pred] as $var[pred]/.
2310          */
2311         if (locpath == NULL) {
2312             if (ALLOC(locpath) < 0)
2313                 goto error;
2314             if (ALLOC(locpath->steps) < 0)
2315                 goto error;
2316             locpath->steps->axis = SELF;
2317         }
2318         if (ALLOC(expr) < 0)
2319             goto error;
2320         expr->tag = E_FILTER;
2321         expr->predicates = predicates;
2322         expr->primary    = pop_expr(state);
2323         expr->locpath    = locpath;
2324         push_expr(expr, state);
2325     } else {
2326         parse_location_path(state);
2327     }
2328     return;
2329  error:
2330     free_expr(expr);
2331     free_pred(predicates);
2332     free_locpath(locpath);
2333     return;
2334 }
2335
2336 /*
2337  * UnionExpr ::= PathExpr ('|' PathExpr)*
2338  */
2339 static void parse_union_expr(struct state *state) {
2340     parse_path_expr(state);
2341     RET_ON_ERROR;
2342     while (match(state, '|')) {
2343         parse_path_expr(state);
2344         RET_ON_ERROR;
2345         push_new_binary_op(OP_UNION, state);
2346     }
2347 }
2348
2349 /*
2350  * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2351  */
2352 static void parse_multiplicative_expr(struct state *state) {
2353     parse_union_expr(state);
2354     RET_ON_ERROR;
2355     while (match(state, '*')) {
2356         parse_union_expr(state);
2357         RET_ON_ERROR;
2358         push_new_binary_op(OP_STAR, state);
2359     }
2360 }
2361
2362 /*
2363  * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2364  * AdditiveOp   ::= '+' | '-'
2365  */
2366 static void parse_additive_expr(struct state *state) {
2367     parse_multiplicative_expr(state);
2368     RET_ON_ERROR;
2369     while (*state->pos == '+' || *state->pos == '-') {
2370         enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2371         state->pos += 1;
2372         skipws(state);
2373         parse_multiplicative_expr(state);
2374         RET_ON_ERROR;
2375         push_new_binary_op(op, state);
2376     }
2377 }
2378
2379 /*
2380  * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2381  * RelationalOp ::= ">" | "<" | ">=" | "<="
2382  */
2383 static void parse_relational_expr(struct state *state) {
2384     parse_additive_expr(state);
2385     RET_ON_ERROR;
2386     if (*state->pos == '<' || *state->pos == '>') {
2387         enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2388         state->pos += 1;
2389         if (*state->pos == '=') {
2390             op = (op == OP_LT) ? OP_LE : OP_GE;
2391             state->pos += 1;
2392         }
2393         skipws(state);
2394         parse_additive_expr(state);
2395         RET_ON_ERROR;
2396         push_new_binary_op(op, state);
2397     }
2398 }
2399
2400 /*
2401  * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2402  * EqualityOp ::= "=" | "!="
2403  * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2404  * MatchOp ::= "=~" | "!~"
2405  */
2406 static void parse_equality_expr(struct state *state) {
2407     parse_relational_expr(state);
2408     RET_ON_ERROR;
2409     if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2410         enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2411         state->pos += 2;
2412         skipws(state);
2413         parse_relational_expr(state);
2414         RET_ON_ERROR;
2415         push_new_binary_op(op, state);
2416     } else if (*state->pos == '=' ||
2417         (*state->pos == '!' && state->pos[1] == '=')) {
2418         enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2419         state->pos += (op == OP_EQ) ? 1 : 2;
2420         skipws(state);
2421         parse_relational_expr(state);
2422         RET_ON_ERROR;
2423         push_new_binary_op(op, state);
2424     }
2425 }
2426
2427 /*
2428  * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2429  */
2430 static void parse_and_expr(struct state *state) {
2431     parse_equality_expr(state);
2432     RET_ON_ERROR;
2433     while (*state->pos == 'a' && state->pos[1] == 'n'
2434         && state->pos[2] == 'd') {
2435         state->pos += 3;
2436         skipws(state);
2437         parse_equality_expr(state);
2438         RET_ON_ERROR;
2439         push_new_binary_op(OP_AND, state);
2440     }
2441 }
2442
2443 /*
2444  * OrExpr ::= AndExpr ('or' AndExpr)*
2445  */
2446 static void parse_or_expr(struct state *state) {
2447     parse_and_expr(state);
2448     RET_ON_ERROR;
2449     while (*state->pos == 'o' && state->pos[1] == 'r') {
2450         state->pos += 2;
2451         skipws(state);
2452         parse_and_expr(state);
2453         RET_ON_ERROR;
2454         push_new_binary_op(OP_OR, state);
2455     }
2456 }
2457
2458 /*
2459  * Expr ::= OrExpr
2460  */
2461 static void parse_expr(struct state *state) {
2462     skipws(state);
2463     parse_or_expr(state);
2464 }
2465
2466 static void store_error(struct pathx *pathx) {
2467     const char *pathx_msg = NULL;
2468     const char *path = pathx->state->txt;
2469     const pathx_errcode_t errcode = pathx->state->errcode;
2470     struct error *err = pathx->state->error;
2471
2472     char *pos_str = pathx->state->errmsg;
2473     pathx->state->errmsg = NULL;
2474
2475     if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2476         return;
2477
2478     switch (errcode) {
2479     case PATHX_ENOMEM:
2480         err->code = AUG_ENOMEM;
2481         break;
2482     case PATHX_EMMATCH:
2483         err->code = AUG_EMMATCH;
2484         break;
2485     case PATHX_ENOMATCH:
2486         err->code = AUG_ENOMATCH;
2487         break;
2488     default:
2489         err->code = AUG_EPATHX;
2490         break;
2491     }
2492
2493     /* We only need details for pathx syntax errors */
2494     if (err->code != AUG_EPATHX)
2495         return;
2496
2497     int pos;
2498     pathx_msg = pathx_error(pathx, NULL, &pos);
2499
2500     bool has_msg = pos_str != NULL;
2501     int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2502     if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2503         if (has_msg) {
2504             strcat(pos_str, " in ");
2505             strncat(pos_str, path, pos);
2506         } else {
2507             /* initialize pos_str explicitly, path might be "" */
2508             pos_str[0] = '\0';
2509             strncat(pos_str, path, pos);
2510         }
2511         strcat(pos_str, "|=|");
2512         strcat(pos_str, path + pos);
2513     }
2514
2515     err->minor = errcode;
2516     err->details = pos_str;
2517     pos_str = NULL;
2518     err->minor_details = pathx_msg;
2519 }
2520
2521 int pathx_parse(const struct tree *tree,
2522                 struct error *err,
2523                 const char *txt,
2524                 bool need_nodeset,
2525                 struct pathx_symtab *symtab,
2526                 struct tree *root_ctx,
2527                 struct pathx **pathx) {
2528     struct state *state = NULL;
2529
2530     *pathx = NULL;
2531
2532     if (ALLOC(*pathx) < 0)
2533         goto oom;
2534
2535     (*pathx)->origin = (struct tree *) tree;
2536
2537     /* Set up state */
2538     if (ALLOC((*pathx)->state) < 0)
2539         goto oom;
2540     state = (*pathx)->state;
2541
2542     state->errcode = PATHX_NOERROR;
2543     state->errmsg = NULL;
2544     state->txt = txt;
2545     state->pos = txt;
2546     state->symtab = symtab;
2547     state->root_ctx = root_ctx;
2548     state->error = err;
2549
2550     if (ALLOC_N(state->value_pool, 8) < 0) {
2551         STATE_ENOMEM;
2552         goto done;
2553     }
2554     state->value_pool_size = 8;
2555     state->value_pool[0].tag = T_BOOLEAN;
2556     state->value_pool[0].boolval = 0;
2557     state->value_pool[1].tag = T_BOOLEAN;
2558     state->value_pool[1].boolval = 1;
2559     state->value_pool_used = 2;
2560
2561     /* Parse */
2562     parse_expr(state);
2563     if (HAS_ERROR(state))
2564         goto done;
2565     if (state->pos != state->txt + strlen(state->txt)) {
2566         STATE_ERROR(state, PATHX_EEND);
2567         goto done;
2568     }
2569
2570     if (state->exprs_used != 1) {
2571         STATE_ERROR(state, PATHX_EINTERNAL);
2572         goto done;
2573     }
2574
2575     /* Typecheck */
2576     check_expr(state->exprs[0], state);
2577     if (HAS_ERROR(state))
2578         goto done;
2579
2580     if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2581         STATE_ERROR(state, PATHX_ETYPE);
2582         goto done;
2583     }
2584
2585  done:
2586     store_error(*pathx);
2587     return state->errcode;
2588  oom:
2589     free_pathx(*pathx);
2590     *pathx = NULL;
2591     if (err != NULL)
2592         err->code = AUG_ENOMEM;
2593     return PATHX_ENOMEM;
2594 }
2595
2596 /*************************************************************************
2597  * Searching in the tree
2598  *************************************************************************/
2599
2600 static bool step_matches(struct step *step, struct tree *tree) {
2601     if (step->name == NULL) {
2602         return step->axis == ROOT || tree->label != NULL;
2603     } else {
2604         return streqx(step->name, tree->label);
2605     }
2606 }
2607
2608 static struct tree *tree_prev(struct tree *pos) {
2609     struct tree *node = NULL;
2610     if (pos != pos->parent->children) {
2611         for (node = pos->parent->children;
2612              node->next != pos;
2613              node = node->next);
2614     }
2615     return node;
2616 }
2617
2618 /* When the first step doesn't begin with ROOT then use relative root context
2619  * instead. */
2620 static struct tree *step_root(struct step *step, struct tree *ctx,
2621                               struct tree *root_ctx) {
2622     struct tree *node = NULL;
2623     switch (step->axis) {
2624     case SELF:
2625     case CHILD:
2626     case DESCENDANT:
2627     case PARENT:
2628     case ANCESTOR:
2629     case PRECEDING_SIBLING:
2630     case FOLLOWING_SIBLING:
2631         /* only use root_ctx when ctx is the absolute tree root */
2632         if (ctx == ctx->parent && root_ctx != NULL)
2633             node = root_ctx;
2634         else
2635             node = ctx;
2636         break;
2637     case ROOT:
2638     case DESCENDANT_OR_SELF:
2639         node = ctx;
2640         break;
2641     default:
2642         assert(0);
2643     }
2644     if (node == NULL)
2645         return NULL;
2646     return node;
2647 }
2648
2649 static struct tree *step_first(struct step *step, struct tree *ctx) {
2650     struct tree *node = NULL;
2651     switch (step->axis) {
2652     case SELF:
2653     case DESCENDANT_OR_SELF:
2654         node = ctx;
2655         break;
2656     case CHILD:
2657     case DESCENDANT:
2658         node = ctx->children;
2659         break;
2660     case PARENT:
2661     case ANCESTOR:
2662         node = ctx->parent;
2663         break;
2664     case ROOT:
2665         while (ctx->parent != ctx)
2666             ctx = ctx->parent;
2667         node = ctx;
2668         break;
2669     case PRECEDING_SIBLING:
2670         node = tree_prev(ctx);
2671         break;
2672     case FOLLOWING_SIBLING:
2673         node = ctx->next;
2674         break;
2675     default:
2676         assert(0);
2677     }
2678     if (node == NULL)
2679         return NULL;
2680     if (step_matches(step, node))
2681         return node;
2682     return step_next(step, ctx, node);
2683 }
2684
2685 static struct tree *step_next(struct step *step, struct tree *ctx,
2686                               struct tree *node) {
2687     while (node != NULL) {
2688         switch (step->axis) {
2689         case SELF:
2690             node = NULL;
2691             break;
2692         case CHILD:
2693             node = node->next;
2694             break;
2695         case DESCENDANT:
2696         case DESCENDANT_OR_SELF:
2697             if (node->children != NULL) {
2698                 node = node->children;
2699             } else {
2700                 while (node->next == NULL && node != ctx)
2701                     node = node->parent;
2702                 if (node == ctx)
2703                     node = NULL;
2704                 else
2705                     node = node->next;
2706             }
2707             break;
2708         case PARENT:
2709         case ROOT:
2710             node = NULL;
2711             break;
2712         case ANCESTOR:
2713             if (node->parent == node)
2714                 node = NULL;
2715             else
2716                 node = node->parent;
2717             break;
2718         case PRECEDING_SIBLING:
2719             node = tree_prev(node);
2720             break;
2721         case FOLLOWING_SIBLING:
2722             node = node->next;
2723             break;
2724         default:
2725             assert(0);
2726         }
2727         if (node != NULL && step_matches(step, node))
2728             break;
2729     }
2730     return node;
2731 }
2732
2733 static struct value *pathx_eval(struct pathx *pathx) {
2734     struct state *state = pathx->state;
2735     state->ctx = pathx->origin;
2736     state->ctx_pos = 1;
2737     state->ctx_len = 1;
2738     eval_expr(state->exprs[0], state);
2739     if (HAS_ERROR(state))
2740         return NULL;
2741
2742     if (state->values_used != 1) {
2743         STATE_ERROR(state, PATHX_EINTERNAL);
2744         return NULL;
2745     }
2746     return pop_value(state);
2747 }
2748
2749 struct tree *pathx_next(struct pathx *pathx) {
2750     if (pathx->node + 1 < pathx->nodeset->used)
2751         return pathx->nodeset->nodes[++pathx->node];
2752     return NULL;
2753 }
2754
2755 /* Find the first node in TREE matching PATH. */
2756 struct tree *pathx_first(struct pathx *pathx) {
2757     if (pathx->nodeset == NULL) {
2758         struct value *v = pathx_eval(pathx);
2759
2760         if (HAS_ERROR(pathx->state))
2761             goto error;
2762         assert(v->tag == T_NODESET);
2763         pathx->nodeset = v->nodeset;
2764     }
2765     pathx->node = 0;
2766     if (pathx->nodeset->used == 0)
2767         return NULL;
2768     else
2769         return pathx->nodeset->nodes[0];
2770  error:
2771     store_error(pathx);
2772     return NULL;
2773 }
2774
2775 /* Find a node in the tree that matches the longest prefix of PATH.
2776  *
2777  * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2778  * prefix matches, and -1 if more than one node in the tree match.
2779  *
2780  * TMATCH is set to the tree node that matches, and SMATCH to the next step
2781  * after the one where TMATCH matched. If no node matches or multiple nodes
2782  * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2783  * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2784  */
2785 static int locpath_search(struct locpath_trace *lpt,
2786                           struct tree **tmatch, struct step **smatch) {
2787     int last;
2788     int result = -1;
2789
2790     for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2791     if (last < 0) {
2792         *smatch = lpt->lp->steps;
2793         result = 1;
2794         goto done;
2795     }
2796     if (lpt->ns[last]->used > 1) {
2797         result = -1;
2798         goto done;
2799     }
2800     result = 0;
2801     *tmatch = lpt->ns[last]->nodes[0];
2802     *smatch = lpt->lp->steps;
2803     for (int i=0; i < last; i++)
2804         *smatch = (*smatch)->next;
2805  done:
2806     for (int i=0; i < lpt->maxns; i++)
2807         free_nodeset(lpt->ns[i]);
2808     FREE(lpt->ns);
2809     return result;
2810 }
2811
2812 /* Expand the tree ROOT so that it contains all components of PATH. PATH
2813  * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2814  *
2815  * Return the first segment that was created by this operation, or NULL on
2816  * error.
2817  */
2818 int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2819     int r;
2820     struct step *step = NULL;
2821     struct locpath_trace lpt;
2822     struct tree *first_child = NULL;
2823     struct value *v = NULL;
2824
2825     MEMZERO(&lpt, 1);
2826     path->state->locpath_trace = &lpt;
2827     v = pathx_eval(path);
2828     path->state->locpath_trace = NULL;
2829     if (HAS_ERROR(path->state))
2830         goto error;
2831
2832     if (lpt.maxns == 0) {
2833         if (v->tag != T_NODESET || v->nodeset->used == 0) {
2834             STATE_ERROR(path->state, PATHX_ENOMATCH);
2835             goto error;
2836         }
2837         if (v->nodeset->used > 1)
2838             goto error;
2839         *tree = v->nodeset->nodes[0];
2840         return 0;
2841     }
2842
2843     *tree = path->origin;
2844     r = locpath_search(&lpt, tree, &step);
2845     if (r == -1) {
2846         STATE_ERROR(path->state, PATHX_EMMATCH);
2847         goto error;
2848     }
2849
2850     if (step == NULL)
2851         return 0;
2852
2853     struct tree *parent = *tree;
2854     if (parent == NULL)
2855         parent = path->origin;
2856
2857     list_for_each(s, step) {
2858         if (s->name == NULL || s->axis != CHILD)
2859             goto error;
2860         struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2861         if (first_child == NULL)
2862             first_child = t;
2863         if (t == NULL || t->label == NULL)
2864             goto error;
2865         list_append(parent->children, t);
2866         parent = t;
2867     }
2868
2869     while (first_child->children != NULL)
2870         first_child = first_child->children;
2871
2872     *tree = first_child;
2873     return 1;
2874
2875  error:
2876     if (first_child != NULL) {
2877         list_remove(first_child, first_child->parent->children);
2878         free_tree(first_child);
2879     }
2880     *tree = NULL;
2881     store_error(path);
2882     return -1;
2883 }
2884
2885 int pathx_find_one(struct pathx *path, struct tree **tree) {
2886     *tree = pathx_first(path);
2887     if (HAS_ERROR(path->state))
2888         return -1;
2889     return path->nodeset->used;
2890 }
2891
2892 struct error *err_of_pathx(struct pathx *px) {
2893     return px->state->error;
2894 }
2895
2896 const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
2897     int errcode = PATHX_ENOMEM;
2898
2899     if (path != NULL) {
2900         if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
2901             errcode = path->state->errcode;
2902         else
2903             errcode = PATHX_EINTERNAL;
2904
2905         if (txt)
2906             *txt = path->state->txt;
2907
2908         if (pos)
2909             *pos = path->state->pos - path->state->txt;
2910     }
2911     return errcodes[errcode];
2912 }
2913
2914 /*
2915  * Symbol tables
2916  */
2917 static struct pathx_symtab
2918 *make_symtab(struct pathx_symtab *symtab, const char *name,
2919              struct value *value)
2920 {
2921     struct pathx_symtab *new;
2922     char *n = NULL;
2923
2924     n = strdup(name);
2925     if (n == NULL)
2926         return NULL;
2927
2928     if (ALLOC(new) < 0) {
2929         free(n);
2930         return NULL;
2931     }
2932     new->name = n;
2933     new->value = value;
2934     if (symtab == NULL) {
2935         return new;
2936     } else {
2937         new->next = symtab->next;
2938         symtab->next = new;
2939     }
2940     return symtab;
2941 }
2942
2943 void free_symtab(struct pathx_symtab *symtab) {
2944
2945     while (symtab != NULL) {
2946         struct pathx_symtab *del = symtab;
2947         symtab = del->next;
2948         free(del->name);
2949         release_value(del->value);
2950         free(del->value);
2951         free(del);
2952     }
2953 }
2954
2955 struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
2956     return pathx->state->symtab;
2957 }
2958
2959 static int pathx_symtab_set(struct pathx_symtab **symtab,
2960                             const char *name, struct value *v) {
2961     int found = 0;
2962
2963     list_for_each(tab, *symtab) {
2964         if (STREQ(tab->name, name)) {
2965             release_value(tab->value);
2966             free(tab->value);
2967             tab->value = v;
2968             found = 1;
2969             break;
2970         }
2971     }
2972
2973     if (!found) {
2974         struct pathx_symtab *new = NULL;
2975
2976         new = make_symtab(*symtab, name, v);
2977         if (new == NULL)
2978             goto error;
2979         *symtab = new;
2980     }
2981     return 0;
2982  error:
2983     return -1;
2984 }
2985
2986 int pathx_symtab_define(struct pathx_symtab **symtab,
2987                         const char *name, struct pathx *px) {
2988     int r;
2989     struct value *value = NULL, *v = NULL;
2990     struct state *state = px->state;
2991
2992     value = pathx_eval(px);
2993     if (HAS_ERROR(px->state))
2994         goto error;
2995
2996     if (ALLOC(v) < 0) {
2997         STATE_ENOMEM;
2998         goto error;
2999     }
3000
3001     *v = *value;
3002     value->tag = T_BOOLEAN;
3003
3004     r = pathx_symtab_set(symtab, name, v);
3005     if (r < 0) {
3006         STATE_ENOMEM;
3007         goto error;
3008     }
3009
3010     if (v->tag == T_NODESET)
3011         return v->nodeset->used;
3012     else
3013         return 0;
3014  error:
3015     release_value(value);
3016     free(value);
3017     release_value(v);
3018     free(v);
3019     store_error(px);
3020     return -1;
3021 }
3022
3023 int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
3024     struct pathx_symtab *del = NULL;
3025
3026     for(del = *symtab;
3027         del != NULL && !STREQ(del->name, name);
3028         del = del->next);
3029     if (del == NULL)
3030         return 0;
3031     list_remove(del, *symtab);
3032     free_symtab(del);
3033     return 0;
3034 }
3035
3036 int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
3037                              const char *name, struct tree *tree) {
3038     struct value *v = NULL;
3039     int r;
3040
3041     if (ALLOC(v) < 0)
3042         goto error;
3043
3044     v->tag = T_NODESET;
3045     if (ALLOC(v->nodeset) < 0)
3046         goto error;
3047     if (ALLOC_N(v->nodeset->nodes, 1) < 0)
3048         goto error;
3049     v->nodeset->used = 1;
3050     v->nodeset->size = 1;
3051     v->nodeset->nodes[0] = tree;
3052
3053     r = pathx_symtab_set(symtab, name, v);
3054     if (r < 0)
3055         goto error;
3056     return 1;
3057  error:
3058     release_value(v);
3059     free(v);
3060     return -1;
3061 }
3062
3063 int
3064 pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
3065     struct value *v = lookup_var(name, symtab);
3066
3067     if (v == NULL || v->tag != T_NODESET)
3068         return -1;
3069
3070     return v->nodeset->used;
3071 }
3072
3073 struct tree *
3074 pathx_symtab_get_tree(struct pathx_symtab *symtab,
3075                       const char *name, int i) {
3076     struct value *v = lookup_var(name, symtab);
3077     if (v == NULL)
3078         return NULL;
3079     if (v->tag != T_NODESET)
3080         return NULL;
3081     if (i >= v->nodeset->used)
3082         return NULL;
3083     return v->nodeset->nodes[i];
3084 }
3085
3086 void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3087                                      const struct tree *tree) {
3088     list_for_each(tab, symtab) {
3089         if (tab->value->tag != T_NODESET)
3090             continue;
3091         struct nodeset *ns = tab->value->nodeset;
3092         for (int i=0; i < ns->used;) {
3093             struct tree *t = ns->nodes[i];
3094             while (t != t->parent && t != tree)
3095                 t = t->parent;
3096             if (t == tree)
3097                 ns_remove(ns, i, 1);
3098             else
3099                 i += 1;
3100         }
3101     }
3102 }
3103
3104 /*
3105  * Local variables:
3106  *  indent-tabs-mode: nil
3107  *  c-indent-level: 4
3108  *  c-basic-offset: 4
3109  *  tab-width: 4
3110  * End:
3111  */