src/pathx.c

   1 /*
   2  * pathx.c: handling path expressions
   3  *
   4  * Copyright (C) 2007-2015 David Lutterkort
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  19  *
  20  * Author: David Lutterkort <dlutter@redhat.com>
  21  */
  22
  23 #include <config.h>
  24 #include <internal.h>
  25 #include <stdint.h>
  26 #include <stdbool.h>
  27 #include <memory.h>
  28 #include <ctype.h>
  29
  30 #include "ref.h"
  31 #include "regexp.h"
  32 #include "errcode.h"
  33
  34 static const char *const errcodes[] = {
  35     "no error",
  36     "empty name",
  37     "illegal string literal",
  38     "illegal number",
  39     "string missing ending ' or \"",
  40     "expected '='",
  41     "allocation failed",
  42     "unmatched '['",
  43     "unmatched '('",
  44     "expected a '/'",
  45     "internal error",                             /* PATHX_EINTERNAL */
  46     "type error",                                 /* PATHX_ETYPE */
  47     "undefined variable",                         /* PATHX_ENOVAR */
  48     "garbage at end of path expression",          /* PATHX_EEND */
  49     "no match for path expression",               /* PATHX_ENOMATCH */
  50     "wrong number of arguments in function call", /* PATHX_EARITY */
  51     "invalid regular expression",                 /* PATHX_EREGEXP */
  52     "too many matches",                           /* PATHX_EMMATCH */
  53     "wrong flag for regexp"                       /* PATHX_EREGEXPFLAG */
  54 };
  55
  56 /*
  57  * Path expressions are strings that use a notation modelled on XPath.
  58  */
  59
  60 enum type {
  61     T_NONE = 0,     /* Not a type */
  62     T_NODESET,
  63     T_BOOLEAN,
  64     T_NUMBER,
  65     T_STRING,
  66     T_REGEXP
  67 };
  68
  69 enum expr_tag {
  70     E_FILTER,
  71     E_BINARY,
  72     E_VALUE,
  73     E_VAR,
  74     E_APP
  75 };
  76
  77 enum binary_op {
  78     OP_EQ,         /* '='  */
  79     OP_NEQ,        /* '!=' */
  80     OP_LT,         /* '<'  */
  81     OP_LE,         /* '<=' */
  82     OP_GT,         /* '>'  */
  83     OP_GE,         /* '>=' */
  84     OP_PLUS,       /* '+'  */
  85     OP_MINUS,      /* '-'  */
  86     OP_STAR,       /* '*'  */
  87     OP_AND,        /* 'and' */
  88     OP_OR,         /* 'or' */
  89     OP_RE_MATCH,   /* '=~' */
  90     OP_RE_NOMATCH, /* '!~' */
  91     OP_UNION       /* '|' */
  92 };
  93
  94 struct pred {
  95     int               nexpr;
  96     struct expr     **exprs;
  97 };
  98
  99 enum axis {
 100     SELF,
 101     CHILD,
 102     DESCENDANT,
 103     DESCENDANT_OR_SELF,
 104     PARENT,
 105     ANCESTOR,
 106     ROOT,
 107     PRECEDING_SIBLING,
 108     FOLLOWING_SIBLING
 109 };
 110
 111 /* This array is indexed by enum axis */
 112 static const char *const axis_names[] = {
 113     "self",
 114     "child",
 115     "descendant",
 116     "descendant-or-self",
 117     "parent",
 118     "ancestor",
 119     "root",
 120     "preceding-sibling",
 121     "following-sibling"
 122 };
 123
 124 static const char *const axis_sep = "::";
 125
 126 /* The characters that can follow a name in a location expression (aka path)
 127  * The parser will assume that name (path component) is finished when it
 128  * encounters any of these characters, unless they are escaped by preceding
 129  * them with a '\\'.
 130  *
 131  * See parse_name for the gory details
 132  */
 133 static const char const name_follow[] = "][|/=()!,";
 134
 135 /* Doubly linked list of location steps. Besides the information from the
 136  * path expression, also contains information to iterate over a node set,
 137  * in particular, the context node CTX for the step, and the current node
 138  * CUR within that context.
 139  */
 140 struct step {
 141     struct step *next;
 142     enum axis    axis;
 143     char        *name;              /* NULL to match any name */
 144     struct pred *predicates;
 145 };
 146
 147 /* Initialise the root nodeset with the first step */
 148 static struct tree *step_root(struct step *step, struct tree *ctx,
 149                               struct tree *root_ctx);
 150 /* Iteration over the nodes on a step, ignoring the predicates */
 151 static struct tree *step_first(struct step *step, struct tree *ctx);
 152 static struct tree *step_next(struct step *step, struct tree *ctx,
 153                               struct tree *node);
 154
 155 struct pathx_symtab {
 156     struct pathx_symtab *next;
 157     char                *name;
 158     struct value        *value;
 159 };
 160
 161 struct pathx {
 162     struct state   *state;
 163     struct nodeset *nodeset;
 164     int             node;
 165     struct tree    *origin;
 166 };
 167
 168 #define L_BRACK '['
 169 #define R_BRACK ']'
 170
 171 struct locpath {
 172     struct step *steps;
 173 };
 174
 175 struct nodeset {
 176     struct tree **nodes;
 177     size_t        used;
 178     size_t        size;
 179 };
 180
 181 typedef uint32_t value_ind_t;
 182
 183 struct value {
 184     enum type tag;
 185     union {
 186         struct nodeset  *nodeset;     /* T_NODESET */
 187         int              number;      /* T_NUMBER  */
 188         char            *string;      /* T_STRING  */
 189         bool             boolval;     /* T_BOOLEAN */
 190         struct regexp   *regexp;      /* T_REGEXP  */
 191     };
 192 };
 193
 194 struct expr {
 195     enum expr_tag tag;
 196     enum type     type;
 197     union {
 198         struct {                       /* E_FILTER */
 199             struct expr     *primary;
 200             struct pred     *predicates;
 201             struct locpath  *locpath;
 202         };
 203         struct {                       /* E_BINARY */
 204             enum binary_op op;
 205             struct expr *left;
 206             struct expr *right;
 207         };
 208         value_ind_t      value_ind;    /* E_VALUE */
 209         char            *ident;        /* E_VAR */
 210         struct {                       /* E_APP */
 211             const struct func *func;
 212             struct expr       **args;
 213         };
 214     };
 215 };
 216
 217 struct locpath_trace {
 218     unsigned int      maxns;
 219     struct nodeset  **ns;
 220     struct locpath   *lp;
 221 };
 222
 223 /* Internal state of the evaluator/parser */
 224 struct state {
 225     pathx_errcode_t errcode;
 226     const char     *file;
 227     int             line;
 228     char           *errmsg;
 229
 230     const char     *txt;  /* Entire expression */
 231     const char     *pos;  /* Current position within TXT during parsing */
 232
 233     struct tree *ctx; /* The current node */
 234     uint            ctx_pos;
 235     uint            ctx_len;
 236
 237     struct tree *root_ctx; /* Root context for relative paths */
 238
 239     /* A table of all values. The table is dynamically reallocated, i.e.
 240      * pointers to struct value should not be used across calls that
 241      * might allocate new values
 242      *
 243      * value_pool[0] is always the boolean false, and value_pool[1]
 244      * always the boolean true
 245      */
 246     struct value  *value_pool;
 247     value_ind_t    value_pool_used;
 248     value_ind_t    value_pool_size;
 249     /* Stack of values (as indices into value_pool), with bottom of
 250        stack in values[0] */
 251     value_ind_t   *values;
 252     size_t         values_used;
 253     size_t         values_size;
 254     /* Stack of expressions, with bottom of stack in exprs[0] */
 255     struct expr  **exprs;
 256     size_t         exprs_used;
 257     size_t         exprs_size;
 258     /* Trace of a locpath evaluation, needed by pathx_expand_tree.
 259        Generally NULL, unless a trace is needed.
 260      */
 261     struct locpath_trace *locpath_trace;
 262     /* Symbol table for variable lookups */
 263     struct pathx_symtab *symtab;
 264     /* Error structure, used to communicate errors to struct augeas;
 265      * we never own this structure, and therefore never free it */
 266     struct error        *error;
 267 };
 268
 269 /* We consider NULL and the empty string to be equal */
 270 ATTRIBUTE_PURE
 271 static inline int streqx(const char *s1, const char *s2) {
 272     if (s1 == NULL)
 273         return (s2 == NULL || strlen(s2) == 0);
 274     if (s2 == NULL)
 275         return strlen(s1) == 0;
 276     return STREQ(s1, s2);
 277 }
 278
 279 /* Functions */
 280
 281 typedef void (*func_impl_t)(struct state *state, int nargs);
 282
 283 struct func {
 284     const char      *name;
 285     unsigned int     arity;
 286     enum type        type;
 287     const enum type *arg_types;
 288     func_impl_t      impl;
 289 };
 290
 291 static void func_last(struct state *state, int nargs);
 292 static void func_position(struct state *state, int nargs);
 293 static void func_count(struct state *state, int nargs);
 294 static void func_label(struct state *state, int nargs);
 295 static void func_regexp(struct state *state, int nargs);
 296 static void func_regexp_flag(struct state *state, int nargs);
 297 static void func_glob(struct state *state, int nargs);
 298 static void func_int(struct state *state, int nargs);
 299
 300 static const enum type const arg_types_nodeset[] = { T_NODESET };
 301 static const enum type const arg_types_string[] = { T_STRING };
 302 static const enum type const arg_types_bool[] = { T_BOOLEAN };
 303 static const enum type const arg_types_string_string[] = { T_STRING, T_STRING };
 304 static const enum type const arg_types_nodeset_string[] = { T_NODESET, T_STRING };
 305
 306 static const struct func builtin_funcs[] = {
 307     { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
 308       .impl = func_last },
 309     { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
 310       .impl = func_position },
 311     { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
 312       .impl = func_label },
 313     { .name = "count", .arity = 1, .type = T_NUMBER,
 314       .arg_types = arg_types_nodeset,
 315       .impl = func_count },
 316     { .name = "regexp", .arity = 1, .type = T_REGEXP,
 317       .arg_types = arg_types_string,
 318       .impl = func_regexp },
 319     { .name = "regexp", .arity = 1, .type = T_REGEXP,
 320       .arg_types = arg_types_nodeset,
 321       .impl = func_regexp },
 322     { .name = "regexp", .arity = 2, .type = T_REGEXP,
 323       .arg_types = arg_types_string_string,
 324       .impl = func_regexp_flag },
 325     { .name = "regexp", .arity = 2, .type = T_REGEXP,
 326       .arg_types = arg_types_nodeset_string,
 327       .impl = func_regexp_flag },
 328     { .name = "glob", .arity = 1, .type = T_REGEXP,
 329       .arg_types = arg_types_string,
 330       .impl = func_glob },
 331     { .name = "glob", .arity = 1, .type = T_REGEXP,
 332       .arg_types = arg_types_nodeset,
 333       .impl = func_glob },
 334     { .name = "int", .arity = 1, .type = T_NUMBER,
 335       .arg_types = arg_types_string, .impl = func_int },
 336     { .name = "int", .arity = 1, .type = T_NUMBER,
 337       .arg_types = arg_types_nodeset, .impl = func_int },
 338     { .name = "int", .arity = 1, .type = T_NUMBER,
 339       .arg_types = arg_types_bool, .impl = func_int }
 340 };
 341
 342 #define RET_ON_ERROR                                                    \
 343     if (state->errcode != PATHX_NOERROR) return
 344
 345 #define RET0_ON_ERROR                                                   \
 346     if (state->errcode != PATHX_NOERROR) return 0
 347
 348 #define STATE_ERROR(state, err)                                         \
 349     do {                                                                \
 350         state->errcode = err;                                           \
 351         state->file = __FILE__;                                         \
 352         state->line = __LINE__;                                         \
 353     } while (0)
 354
 355 #define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
 356
 357 #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
 358
 359 #define ENOMEM_ON_NULL(state, v)                                        \
 360     do {                                                                \
 361         if (v == NULL) {                                                \
 362             STATE_ERROR(state, PATHX_ENOMEM);                           \
 363             return NULL;                                                \
 364         }                                                               \
 365     } while (0);
 366
 367 /*
 368  * Free the various data structures
 369  */
 370
 371 static void free_expr(struct expr *expr);
 372
 373 static void free_pred(struct pred *pred) {
 374     if (pred == NULL)
 375         return;
 376
 377     for (int i=0; i < pred->nexpr; i++) {
 378         free_expr(pred->exprs[i]);
 379     }
 380     free(pred->exprs);
 381     free(pred);
 382 }
 383
 384 static void free_step(struct step *step) {
 385     while (step != NULL) {
 386         struct step *del = step;
 387         step = del->next;
 388         free(del->name);
 389         free_pred(del->predicates);
 390         free(del);
 391     }
 392 }
 393
 394 static void free_locpath(struct locpath *locpath) {
 395     if (locpath == NULL)
 396         return;
 397     while (locpath->steps != NULL) {
 398         struct step *step = locpath->steps;
 399         locpath->steps = step->next;
 400         free(step->name);
 401         free_pred(step->predicates);
 402         free(step);
 403     }
 404     free(locpath);
 405 }
 406
 407 static void free_expr(struct expr *expr) {
 408     if (expr == NULL)
 409         return;
 410     switch (expr->tag) {
 411     case E_FILTER:
 412         free_expr(expr->primary);
 413         free_pred(expr->predicates);
 414         free_locpath(expr->locpath);
 415         break;
 416     case E_BINARY:
 417         free_expr(expr->left);
 418         free_expr(expr->right);
 419         break;
 420     case E_VALUE:
 421         break;
 422     case E_VAR:
 423         free(expr->ident);
 424         break;
 425     case E_APP:
 426         for (int i=0; i < expr->func->arity; i++)
 427             free_expr(expr->args[i]);
 428         free(expr->args);
 429         break;
 430     default:
 431         assert(0);
 432     }
 433     free(expr);
 434 }
 435
 436 static void free_nodeset(struct nodeset *ns) {
 437     if (ns != NULL) {
 438         free(ns->nodes);
 439         free(ns);
 440     }
 441 }
 442
 443 /* Free all objects used by VALUE, but not VALUE itself */
 444 static void release_value(struct value *v) {
 445     if (v == NULL)
 446         return;
 447
 448     switch (v->tag) {
 449     case T_NODESET:
 450         free_nodeset(v->nodeset);
 451         break;
 452     case T_STRING:
 453         free(v->string);
 454         break;
 455     case T_BOOLEAN:
 456     case T_NUMBER:
 457         break;
 458     case T_REGEXP:
 459         unref(v->regexp, regexp);
 460         break;
 461     default:
 462         assert(0);
 463     }
 464 }
 465
 466 static void free_state(struct state *state) {
 467     if (state == NULL)
 468         return;
 469
 470     for(int i=0; i < state->exprs_used; i++)
 471         free_expr(state->exprs[i]);
 472     free(state->exprs);
 473
 474     for(int i=0; i < state->value_pool_used; i++)
 475         release_value(state->value_pool + i);
 476     free(state->value_pool);
 477     free(state->values);
 478     free(state);
 479 }
 480
 481 void free_pathx(struct pathx *pathx) {
 482     if (pathx == NULL)
 483         return;
 484     free_state(pathx->state);
 485     free(pathx);
 486 }
 487
 488 /*
 489  * Nodeset helpers
 490  */
 491 static struct nodeset *make_nodeset(struct state *state) {
 492     struct nodeset *result;
 493     if (ALLOC(result) < 0)
 494         STATE_ENOMEM;
 495     return result;
 496 }
 497
 498 /* Add NODE to NS if it is not in NS yet. This relies on the flag
 499  * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
 500  * as soon as we are done adding nodes to it.
 501  */
 502 static void ns_add(struct nodeset *ns, struct tree *node,
 503                    struct state *state) {
 504     if (node->added)
 505         return;
 506     if (ns->used >= ns->size) {
 507         size_t size = 2 * ns->size;
 508         if (size < 10) size = 10;
 509         if (REALLOC_N(ns->nodes, size) < 0)
 510             STATE_ENOMEM;
 511         ns->size = size;
 512     }
 513     ns->nodes[ns->used] = node;
 514     node->added = 1;
 515     ns->used += 1;
 516 }
 517
 518 static void ns_clear_added(struct nodeset *ns) {
 519     for (int i=0; i < ns->used; i++)
 520         ns->nodes[i]->added = 0;
 521 }
 522
 523 static struct nodeset *
 524 clone_nodeset(struct nodeset *ns, struct state *state)
 525 {
 526     struct nodeset *clone;
 527     if (ALLOC(clone) < 0) {
 528         STATE_ENOMEM;
 529         return NULL;
 530     }
 531     if (ALLOC_N(clone->nodes, ns->used) < 0) {
 532         free(clone);
 533         STATE_ENOMEM;
 534         return NULL;
 535     }
 536     clone->used = ns->used;
 537     clone->size = ns->used;
 538     for (int i=0; i < ns->used; i++)
 539         clone->nodes[i] = ns->nodes[i];
 540     return clone;
 541 }
 542
 543 /*
 544  * Handling values
 545  */
 546 static value_ind_t make_value(enum type tag, struct state *state) {
 547     assert(tag != T_BOOLEAN);
 548
 549     if (state->value_pool_used >= state->value_pool_size) {
 550         value_ind_t new_size = 2*state->value_pool_size;
 551         if (new_size <= state->value_pool_size) {
 552             STATE_ENOMEM;
 553             return 0;
 554         }
 555         if (REALLOC_N(state->value_pool, new_size) < 0) {
 556             STATE_ENOMEM;
 557             return 0;
 558         }
 559         state->value_pool_size = new_size;
 560     }
 561     state->value_pool[state->value_pool_used].tag = tag;
 562     state->value_pool[state->value_pool_used].nodeset = NULL;
 563     return state->value_pool_used++;
 564 }
 565
 566 ATTRIBUTE_UNUSED
 567 static value_ind_t clone_value(struct value *v, struct state *state) {
 568     value_ind_t vind = make_value(v->tag, state);
 569     RET0_ON_ERROR;
 570     struct value *clone = state->value_pool + vind;
 571
 572     switch (v->tag) {
 573     case T_NODESET:
 574         clone->nodeset = clone_nodeset(v->nodeset, state);
 575         break;
 576     case T_NUMBER:
 577         clone->number = v->number;
 578         break;
 579     case T_STRING:
 580         clone->string = strdup(v->string);
 581         if (clone->string == NULL) {
 582             FREE(clone);
 583             STATE_ENOMEM;
 584         }
 585         break;
 586     case T_BOOLEAN:
 587         clone->boolval = v->boolval;
 588         break;
 589     case T_REGEXP:
 590         clone->regexp = ref(v->regexp);
 591         break;
 592     default:
 593         assert(0);
 594     }
 595     return vind;
 596 }
 597
 598 static value_ind_t pop_value_ind(struct state *state) {
 599     if (state->values_used > 0) {
 600         state->values_used -= 1;
 601         return state->values[state->values_used];
 602     } else {
 603         STATE_ERROR(state, PATHX_EINTERNAL);
 604         assert(0);
 605         return 0;
 606     }
 607 }
 608
 609 static struct value *pop_value(struct state *state) {
 610     value_ind_t vind = pop_value_ind(state);
 611     if (HAS_ERROR(state))
 612         return NULL;
 613     return state->value_pool + vind;
 614 }
 615
 616 static void push_value(value_ind_t vind, struct state *state) {
 617     if (state->values_used >= state->values_size) {
 618         size_t new_size = 2*state->values_size;
 619         if (new_size == 0) new_size = 8;
 620         if (REALLOC_N(state->values, new_size) < 0) {
 621             STATE_ENOMEM;
 622             return;
 623         }
 624         state->values_size = new_size;
 625     }
 626     state->values[state->values_used++] = vind;
 627 }
 628
 629 static void push_boolean_value(int b, struct state *state) {
 630     push_value(b != 0, state);
 631 }
 632
 633 ATTRIBUTE_PURE
 634 static struct value *expr_value(struct expr *expr, struct state *state) {
 635     return state->value_pool + expr->value_ind;
 636 }
 637
 638 /*************************************************************************
 639  * Evaluation
 640  ************************************************************************/
 641 static void eval_expr(struct expr *expr, struct state *state);
 642
 643
 644 #define ensure_arity(min, max)                                          \
 645     if (nargs < min || nargs > max) {                                   \
 646         STATE_ERROR(state, PATHX_EINTERNAL);                            \
 647         return;                                                         \
 648     }
 649
 650 static void func_last(struct state *state, int nargs) {
 651     ensure_arity(0, 0);
 652     value_ind_t vind = make_value(T_NUMBER, state);
 653     RET_ON_ERROR;
 654
 655     state->value_pool[vind].number = state->ctx_len;
 656     push_value(vind, state);
 657 }
 658
 659 static void func_position(struct state *state, int nargs) {
 660     ensure_arity(0, 0);
 661     value_ind_t vind = make_value(T_NUMBER, state);
 662     RET_ON_ERROR;
 663
 664     state->value_pool[vind].number = state->ctx_pos;
 665     push_value(vind, state);
 666 }
 667
 668 static void func_count(struct state *state, int nargs) {
 669     ensure_arity(1, 1);
 670     value_ind_t vind = make_value(T_NUMBER, state);
 671     RET_ON_ERROR;
 672
 673     struct value *ns = pop_value(state);
 674     state->value_pool[vind].number = ns->nodeset->used;
 675     push_value(vind, state);
 676 }
 677
 678 static void func_label(struct state *state, int nargs) {
 679     ensure_arity(0, 0);
 680     value_ind_t vind = make_value(T_STRING, state);
 681     char *s;
 682
 683     RET_ON_ERROR;
 684
 685     if (state->ctx->label)
 686         s = strdup(state->ctx->label);
 687     else
 688         s = strdup("");
 689     if (s == NULL) {
 690         STATE_ENOMEM;
 691         return;
 692     }
 693     state->value_pool[vind].string = s;
 694     push_value(vind, state);
 695 }
 696
 697 static void func_int(struct state *state, int nargs) {
 698     ensure_arity(1, 1);
 699     value_ind_t vind = make_value(T_NUMBER, state);
 700     int64_t i = -1;
 701     RET_ON_ERROR;
 702
 703     struct value *v = pop_value(state);
 704     if (v->tag == T_BOOLEAN) {
 705         i = v->boolval;
 706     } else {
 707         const char *s = NULL;
 708         if (v->tag == T_STRING) {
 709             s = v->string;
 710         } else {
 711             /* T_NODESET */
 712             if (v->nodeset->used != 1) {
 713                 STATE_ERROR(state, PATHX_EMMATCH);
 714                 return;
 715             }
 716             s = v->nodeset->nodes[0]->value;
 717         }
 718         if (s != NULL) {
 719             int r;
 720             r = xstrtoint64(s, 10, &i);
 721             if (r < 0) {
 722                 STATE_ERROR(state, PATHX_ENUMBER);
 723                 return;
 724             }
 725         }
 726     }
 727     state->value_pool[vind].number = i;
 728     push_value(vind, state);
 729 }
 730
 731 static struct regexp *
 732 nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
 733     struct regexp *result = NULL;
 734     struct regexp **rx = NULL;
 735     int used = 0;
 736
 737     for (int i = 0; i < ns->used; i++) {
 738         if (ns->nodes[i]->value != NULL)
 739             used += 1;
 740     }
 741
 742     if (used == 0) {
 743         /* If the nodeset is empty, make sure we produce a regexp
 744          * that never matches anything */
 745         result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
 746     } else {
 747         if (ALLOC_N(rx, ns->used) < 0)
 748             goto error;
 749         for (int i=0; i < ns->used; i++) {
 750             if (ns->nodes[i]->value == NULL)
 751                 continue;
 752
 753             if (glob)
 754                 rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
 755             else
 756                 rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
 757             if (rx[i] == NULL)
 758                 goto error;
 759         }
 760         result = regexp_union_n(info, ns->used, rx);
 761     }
 762
 763  error:
 764     if (rx != NULL) {
 765         for (int i=0; i < ns->used; i++)
 766             unref(rx[i], regexp);
 767         free(rx);
 768     }
 769     return result;
 770 }
 771
 772 static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
 773     value_ind_t vind = make_value(T_REGEXP, state);
 774     int r;
 775
 776     RET_ON_ERROR;
 777
 778     struct value *v = pop_value(state);
 779     struct regexp *rx = NULL;
 780
 781     if (v->tag == T_STRING) {
 782         if (glob)
 783             rx = make_regexp_from_glob(state->error->info, v->string);
 784         else
 785             rx = make_regexp_unescape(state->error->info, v->string, nocase);
 786     } else if (v->tag == T_NODESET) {
 787         rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
 788     } else {
 789         assert(0);
 790     }
 791
 792     if (rx == NULL) {
 793         STATE_ENOMEM;
 794         return;
 795     }
 796
 797     state->value_pool[vind].regexp = rx;
 798     r = regexp_compile(rx);
 799     if (r < 0) {
 800         const char *msg;
 801         regexp_check(rx, &msg);
 802         state->errmsg = strdup(msg);
 803         STATE_ERROR(state, PATHX_EREGEXP);
 804         return;
 805     }
 806     push_value(vind, state);
 807 }
 808
 809 static void func_regexp(struct state *state, int nargs) {
 810     ensure_arity(1, 1);
 811     func_regexp_or_glob(state, 0, 0);
 812 }
 813
 814 static void func_regexp_flag(struct state *state, int nargs) {
 815     ensure_arity(2, 2);
 816     int nocase = 0;
 817     struct value *f = pop_value(state);
 818
 819     if (STREQ("i", f->string))
 820         nocase = 1;
 821     else
 822         STATE_ERROR(state, PATHX_EREGEXPFLAG);
 823
 824     func_regexp_or_glob(state, 0, nocase);
 825 }
 826
 827 static void func_glob(struct state *state, int nargs) {
 828     ensure_arity(1, 1);
 829     func_regexp_or_glob(state, 1, 0);
 830 }
 831
 832 static bool coerce_to_bool(struct value *v) {
 833     switch (v->tag) {
 834     case T_NODESET:
 835         return v->nodeset->used > 0;
 836         break;
 837     case T_BOOLEAN:
 838         return v->boolval;
 839         break;
 840     case T_NUMBER:
 841         return v->number > 0;
 842         break;
 843     case T_STRING:
 844         return strlen(v->string) > 0;
 845         break;
 846     case T_REGEXP:
 847         return true;
 848     default:
 849         assert(0);
 850         return false;
 851     }
 852 }
 853
 854 static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
 855                                    int neq) {
 856     for (int i1=0; i1 < ns1->used; i1++) {
 857         struct tree *t1 = ns1->nodes[i1];
 858         for (int i2=0; i2 < ns2->used; i2++) {
 859             struct tree *t2 = ns2->nodes[i2];
 860             if (neq) {
 861                 if (!streqx(t1->value, t2->value))
 862                     return 1;
 863             } else {
 864                 if (streqx(t1->value, t2->value))
 865                     return 1;
 866             }
 867         }
 868     }
 869     return 0;
 870 }
 871
 872 static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
 873                                   int neq) {
 874     for (int i=0; i < ns->used; i++) {
 875         struct tree *t = ns->nodes[i];
 876         if (neq) {
 877             if (!streqx(t->value, s))
 878                 return 1;
 879         } else {
 880             if (streqx(t->value, s))
 881                 return 1;
 882         }
 883     }
 884     return 0;
 885 }
 886
 887 static void eval_eq(struct state *state, int neq) {
 888     struct value *r = pop_value(state);
 889     struct value *l = pop_value(state);
 890     int res;
 891
 892     if (l->tag == T_NODESET && r->tag == T_NODESET) {
 893         res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
 894     } else if (l->tag == T_NODESET) {
 895         res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
 896     } else if (r->tag == T_NODESET) {
 897         res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
 898     } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
 899         if (neq)
 900             res = (l->number != r->number);
 901         else
 902             res = (l->number == r->number);
 903     } else {
 904         assert(l->tag == T_STRING);
 905         assert(r->tag == T_STRING);
 906         res = streqx(l->string, r->string);
 907         if (neq)
 908             res = !res;
 909     }
 910     RET_ON_ERROR;
 911
 912     push_boolean_value(res, state);
 913 }
 914
 915 static void eval_arith(struct state *state, enum binary_op op) {
 916     value_ind_t vind = make_value(T_NUMBER, state);
 917     struct value *r = pop_value(state);
 918     struct value *l = pop_value(state);
 919     int res;
 920
 921     assert(l->tag == T_NUMBER);
 922     assert(r->tag == T_NUMBER);
 923
 924     RET_ON_ERROR;
 925
 926     if (op == OP_PLUS)
 927         res = l->number + r->number;
 928     else if (op == OP_MINUS)
 929         res = l->number - r->number;
 930     else if (op == OP_STAR)
 931         res = l->number * r->number;
 932     else
 933         assert(0);
 934
 935     state->value_pool[vind].number = res;
 936     push_value(vind, state);
 937 }
 938
 939 static void eval_rel(struct state *state, bool greater, bool equal) {
 940     struct value *r, *l;
 941     int res;
 942
 943     /* We always check l < r or l <= r */
 944     if (greater) {
 945         l = pop_value(state);
 946         r = pop_value(state);
 947     } else {
 948         r = pop_value(state);
 949         l = pop_value(state);
 950     }
 951     if (l->tag == T_NUMBER) {
 952         if (equal)
 953             res = (l->number <= r->number);
 954         else
 955             res = (l->number < r->number);
 956     } else if (l->tag == T_STRING) {
 957         int cmp = strcmp(l->string, r->string);
 958         if (equal)
 959             res = cmp <= 0;
 960         else
 961             res = cmp < 0;
 962     } else {
 963         assert(0);
 964     }
 965
 966     push_boolean_value(res, state);
 967 }
 968
 969 static void eval_and_or(struct state *state, enum binary_op op) {
 970     struct value *r = pop_value(state);
 971     struct value *l = pop_value(state);
 972     bool left = coerce_to_bool(l);
 973     bool right = coerce_to_bool(r);
 974
 975
 976     if (op == OP_AND)
 977         push_boolean_value(left && right, state);
 978     else
 979         push_boolean_value(left || right, state);
 980 }
 981
 982 static bool eval_re_match_str(struct state *state, struct regexp *rx,
 983                               const char *str) {
 984     int r;
 985
 986     if (str == NULL)
 987         str = "";
 988
 989     r = regexp_match(rx, str, strlen(str), 0, NULL);
 990     if (r == -2) {
 991         STATE_ERROR(state, PATHX_EINTERNAL);
 992     } else if (r == -3) {
 993         /* We should never get this far; func_regexp should catch
 994          * invalid regexps */
 995         assert(false);
 996     }
 997     return r == strlen(str);
 998 }
 999
1000 static void eval_union(struct state *state) {
1001     value_ind_t vind = make_value(T_NODESET, state);
1002     struct value *r = pop_value(state);
1003     struct value *l = pop_value(state);
1004     struct nodeset *res = NULL;
1005
1006     assert(l->tag == T_NODESET);
1007     assert(r->tag == T_NODESET);
1008
1009     RET_ON_ERROR;
1010
1011     res = clone_nodeset(l->nodeset, state);
1012     RET_ON_ERROR;
1013     for (int i=0; i < r->nodeset->used; i++) {
1014         ns_add(res, r->nodeset->nodes[i], state);
1015         if (HAS_ERROR(state))
1016             goto error;
1017     }
1018     state->value_pool[vind].nodeset = res;
1019     push_value(vind, state);
1020  error:
1021     ns_clear_added(res);
1022 }
1023
1024 static void eval_concat_string(struct state *state) {
1025     value_ind_t vind = make_value(T_STRING, state);
1026     struct value *r = pop_value(state);
1027     struct value *l = pop_value(state);
1028     char *res = NULL;
1029
1030     RET_ON_ERROR;
1031
1032     if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1033         STATE_ENOMEM;
1034         return;
1035     }
1036     strcpy(res, l->string);
1037     strcat(res, r->string);
1038     state->value_pool[vind].string = res;
1039     push_value(vind, state);
1040 }
1041
1042 static void eval_concat_regexp(struct state *state) {
1043     value_ind_t vind = make_value(T_REGEXP, state);
1044     struct value *r = pop_value(state);
1045     struct value *l = pop_value(state);
1046     struct regexp *rx = NULL;
1047
1048     RET_ON_ERROR;
1049
1050     rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1051     if (rx == NULL) {
1052         STATE_ENOMEM;
1053         return;
1054     }
1055
1056     state->value_pool[vind].regexp = rx;
1057     push_value(vind, state);
1058 }
1059
1060 static void eval_re_match(struct state *state, enum binary_op op) {
1061     struct value *rx  = pop_value(state);
1062     struct value *v = pop_value(state);
1063
1064     bool result = false;
1065
1066     if (v->tag == T_STRING) {
1067         result = eval_re_match_str(state, rx->regexp, v->string);
1068         RET_ON_ERROR;
1069     } else if (v->tag == T_NODESET) {
1070         for (int i=0; i < v->nodeset->used && result == false; i++) {
1071             struct tree *t = v->nodeset->nodes[i];
1072             result = eval_re_match_str(state, rx->regexp, t->value);
1073             RET_ON_ERROR;
1074         }
1075     }
1076     if (op == OP_RE_NOMATCH)
1077         result = !result;
1078     push_boolean_value(result, state);
1079 }
1080
1081 static void eval_binary(struct expr *expr, struct state *state) {
1082     eval_expr(expr->left, state);
1083     eval_expr(expr->right, state);
1084     RET_ON_ERROR;
1085
1086     switch (expr->op) {
1087     case OP_EQ:
1088         eval_eq(state, 0);
1089         break;
1090     case OP_NEQ:
1091         eval_eq(state, 1);
1092         break;
1093     case OP_LT:
1094         eval_rel(state, false, false);
1095         break;
1096     case OP_LE:
1097         eval_rel(state, false, true);
1098         break;
1099     case OP_GT:
1100         eval_rel(state, true, false);
1101         break;
1102     case OP_GE:
1103         eval_rel(state, true, true);
1104         break;
1105     case OP_PLUS:
1106         if (expr->type == T_NUMBER)
1107             eval_arith(state, expr->op);
1108         else if (expr->type == T_STRING)
1109             eval_concat_string(state);
1110         else if (expr->type == T_REGEXP)
1111             eval_concat_regexp(state);
1112         break;
1113     case OP_MINUS:
1114     case OP_STAR:
1115         eval_arith(state, expr->op);
1116         break;
1117     case OP_AND:
1118     case OP_OR:
1119         eval_and_or(state, expr->op);
1120         break;
1121     case OP_UNION:
1122         eval_union(state);
1123         break;
1124     case OP_RE_MATCH:
1125     case OP_RE_NOMATCH:
1126         eval_re_match(state, expr->op);
1127         break;
1128     default:
1129         assert(0);
1130     }
1131 }
1132
1133 static void eval_app(struct expr *expr, struct state *state) {
1134     assert(expr->tag == E_APP);
1135
1136     for (int i=0; i < expr->func->arity; i++) {
1137         eval_expr(expr->args[i], state);
1138         RET_ON_ERROR;
1139     }
1140     expr->func->impl(state, expr->func->arity);
1141 }
1142
1143 static bool eval_pred(struct expr *expr, struct state *state) {
1144     eval_expr(expr, state);
1145     RET0_ON_ERROR;
1146
1147     struct value *v = pop_value(state);
1148     switch(v->tag) {
1149     case T_BOOLEAN:
1150         return v->boolval;
1151     case T_NUMBER:
1152         return (state->ctx_pos == v->number);
1153     case T_NODESET:
1154         return v->nodeset->used > 0;
1155     default:
1156         assert(0);
1157         return false;
1158     }
1159 }
1160
1161 /* Remove COUNT successive entries from NS. The first entry to remove is at
1162    IND */
1163 static void ns_remove(struct nodeset *ns, int ind, int count) {
1164     if (count < 1)
1165         return;
1166     memmove(ns->nodes + ind, ns->nodes + ind+count,
1167             sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1168     ns->used -= count;
1169 }
1170
1171 /*
1172  * Remove all nodes from NS for which one of PRED is false
1173  */
1174 static void ns_filter(struct nodeset *ns, struct pred *predicates,
1175                       struct state *state) {
1176     if (predicates == NULL)
1177         return;
1178
1179     struct tree *old_ctx = state->ctx;
1180     uint old_ctx_len = state->ctx_len;
1181     uint old_ctx_pos = state->ctx_pos;
1182
1183     for (int p=0; p < predicates->nexpr; p++) {
1184         int first_bad = -1;  /* The index of the first non-matching node */
1185         state->ctx_len = ns->used;
1186         state->ctx_pos = 1;
1187         for (int i=0; i < ns->used; state->ctx_pos++) {
1188             state->ctx = ns->nodes[i];
1189             bool match = eval_pred(predicates->exprs[p], state);
1190             RET_ON_ERROR;
1191             /* We remove non-matching nodes from NS in batches; this logic
1192              * makes sure that we only call ns_remove at the end of a run
1193              * of non-matching nodes
1194              */
1195             if (match) {
1196                 if (first_bad >= 0) {
1197                     ns_remove(ns, first_bad, i - first_bad);
1198                     i = first_bad + 1;
1199                 } else {
1200                     i += 1;
1201                 }
1202                 first_bad = -1;
1203             } else {
1204                 if (first_bad == -1)
1205                     first_bad = i;
1206                 i += 1;
1207             }
1208         }
1209         if (first_bad >= 0) {
1210             ns_remove(ns, first_bad, ns->used - first_bad);
1211         }
1212     }
1213
1214     state->ctx = old_ctx;
1215     state->ctx_pos = old_ctx_pos;
1216     state->ctx_len = old_ctx_len;
1217 }
1218
1219 /* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1220 static bool position_pred(struct pred *pred) {
1221     return pred != NULL &&
1222         pred->nexpr == 1 &&
1223         pred->exprs[0]->tag == E_VALUE &&
1224         pred->exprs[0]->type == T_NUMBER;
1225 }
1226
1227 /* Return the tree node at the position implied by STEP->PREDICATES. It is
1228    assumed and required that STEP->PREDICATES is actually a
1229    POSITION_PRED.
1230
1231    This method hand-optimizes the important case of a path expression like
1232    'service[42]'
1233 */
1234 static struct tree *position_filter(struct nodeset *ns,
1235                                     struct step *step,
1236                                     struct state *state) {
1237     int value_ind = step->predicates->exprs[0]->value_ind;
1238     int number = state->value_pool[value_ind].number;
1239
1240     int pos = 1;
1241     for (int i=0; i < ns->used; i++) {
1242         for (struct tree *node = step_first(step, ns->nodes[i]);
1243              node != NULL;
1244              node = step_next(step, ns->nodes[i], node), pos++) {
1245             if (pos == number)
1246                 return node;
1247         }
1248     }
1249
1250     return NULL;
1251 }
1252
1253 /* Return an array of nodesets, one for each step in the locpath.
1254  *
1255  * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1256  * contain the nodes that matched the entire locpath
1257  */
1258 static void ns_from_locpath(struct locpath *lp, uint *maxns,
1259                             struct nodeset ***ns,
1260                             const struct nodeset *root,
1261                             struct state *state) {
1262     struct tree *old_ctx = state->ctx;
1263
1264     *maxns = 0;
1265     *ns = NULL;
1266     list_for_each(step, lp->steps)
1267         *maxns += 1;
1268     if (ALLOC_N(*ns, *maxns+1) < 0) {
1269         STATE_ERROR(state, PATHX_ENOMEM);
1270         goto error;
1271     }
1272     for (int i=0; i <= *maxns; i++) {
1273         (*ns)[i] = make_nodeset(state);
1274         if (HAS_ERROR(state))
1275             goto error;
1276     }
1277
1278     if (root == NULL) {
1279         struct step *first_step = NULL;
1280         if (lp != NULL)
1281             first_step = lp->steps;
1282
1283         struct tree *root_tree;
1284         root_tree = step_root(first_step, state->ctx, state->root_ctx);
1285         ns_add((*ns)[0], root_tree, state);
1286         ns_clear_added((*ns)[0]);
1287     } else {
1288         for (int i=0; i < root->used; i++)
1289             ns_add((*ns)[0], root->nodes[i], state);
1290         ns_clear_added((*ns)[0]);
1291     }
1292
1293     if (HAS_ERROR(state))
1294         goto error;
1295
1296     uint cur_ns = 0;
1297     list_for_each(step, lp->steps) {
1298         struct nodeset *work = (*ns)[cur_ns];
1299         struct nodeset *next = (*ns)[cur_ns + 1];
1300         if (position_pred(step->predicates)) {
1301             struct tree *node = position_filter(work, step, state);
1302             if (node) {
1303                 ns_add(next, node, state);
1304                 ns_clear_added(next);
1305             }
1306         } else {
1307             for (int i=0; i < work->used; i++) {
1308                 for (struct tree *node = step_first(step, work->nodes[i]);
1309                      node != NULL;
1310                      node = step_next(step, work->nodes[i], node)) {
1311                     ns_add(next, node, state);
1312                 }
1313             }
1314             ns_clear_added(next);
1315             ns_filter(next, step->predicates, state);
1316             if (HAS_ERROR(state))
1317                 goto error;
1318         }
1319         cur_ns += 1;
1320     }
1321
1322     state->ctx = old_ctx;
1323     return;
1324  error:
1325     if (*ns != NULL) {
1326         for (int i=0; i <= *maxns; i++)
1327             free_nodeset((*ns)[i]);
1328         FREE(*ns);
1329     }
1330     state->ctx = old_ctx;
1331     return;
1332 }
1333
1334 static void eval_filter(struct expr *expr, struct state *state) {
1335     struct locpath *lp = expr->locpath;
1336     struct nodeset **ns = NULL;
1337     struct locpath_trace *lpt = state->locpath_trace;
1338     uint maxns;
1339
1340     state->locpath_trace = NULL;
1341     if (expr->primary == NULL) {
1342         ns_from_locpath(lp, &maxns, &ns, NULL, state);
1343     } else {
1344         eval_expr(expr->primary, state);
1345         RET_ON_ERROR;
1346         value_ind_t primary_ind = pop_value_ind(state);
1347         struct value *primary = state->value_pool + primary_ind;
1348         assert(primary->tag == T_NODESET);
1349         ns_filter(primary->nodeset, expr->predicates, state);
1350         /* Evaluating predicates might have reallocated the value_pool */
1351         primary = state->value_pool + primary_ind;
1352         ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1353     }
1354     RET_ON_ERROR;
1355
1356     value_ind_t vind = make_value(T_NODESET, state);
1357     RET_ON_ERROR;
1358     state->value_pool[vind].nodeset = ns[maxns];
1359     push_value(vind, state);
1360
1361     if (lpt != NULL) {
1362         assert(lpt->ns == NULL);
1363         assert(lpt->lp == NULL);
1364         lpt->maxns = maxns;
1365         lpt->ns = ns;
1366         lpt->lp = lp;
1367         state->locpath_trace = lpt;
1368     } else {
1369         for (int i=0; i < maxns; i++)
1370             free_nodeset(ns[i]);
1371         FREE(ns);
1372     }
1373 }
1374
1375 static struct value *lookup_var(const char *ident, struct state *state) {
1376     list_for_each(tab, state->symtab) {
1377         if (STREQ(ident, tab->name))
1378             return tab->value;
1379     }
1380     return NULL;
1381 }
1382
1383 static void eval_var(struct expr *expr, struct state *state) {
1384     struct value *v = lookup_var(expr->ident, state);
1385     value_ind_t vind = clone_value(v, state);
1386     RET_ON_ERROR;
1387     push_value(vind, state);
1388 }
1389
1390 static void eval_expr(struct expr *expr, struct state *state) {
1391     RET_ON_ERROR;
1392     switch (expr->tag) {
1393     case E_FILTER:
1394         eval_filter(expr, state);
1395         break;
1396     case E_BINARY:
1397         eval_binary(expr, state);
1398         break;
1399     case E_VALUE:
1400         push_value(expr->value_ind, state);
1401         break;
1402     case E_VAR:
1403         eval_var(expr, state);
1404         break;
1405     case E_APP:
1406         eval_app(expr, state);
1407         break;
1408     default:
1409         assert(0);
1410     }
1411 }
1412
1413 /*************************************************************************
1414  * Typechecker
1415  *************************************************************************/
1416
1417 static void check_expr(struct expr *expr, struct state *state);
1418
1419 /* Typecheck a list of predicates. A predicate is a function of
1420  * one of the following types:
1421  *
1422  * T_NODESET -> T_BOOLEAN
1423  * T_NUMBER  -> T_BOOLEAN  (position test)
1424  * T_BOOLEAN -> T_BOOLEAN
1425  */
1426 static void check_preds(struct pred *pred, struct state *state) {
1427     if (pred == NULL)
1428         return;
1429     for (int i=0; i < pred->nexpr; i++) {
1430         struct expr *e = pred->exprs[i];
1431         check_expr(e, state);
1432         RET_ON_ERROR;
1433         if (e->type != T_NODESET && e->type != T_NUMBER &&
1434             e->type != T_BOOLEAN) {
1435             STATE_ERROR(state, PATHX_ETYPE);
1436             return;
1437         }
1438     }
1439 }
1440
1441 static void check_filter(struct expr *expr, struct state *state) {
1442     assert(expr->tag == E_FILTER);
1443     struct locpath *locpath = expr->locpath;
1444
1445     if (expr->primary != NULL) {
1446         check_expr(expr->primary, state);
1447         if (expr->primary->type != T_NODESET) {
1448             STATE_ERROR(state, PATHX_ETYPE);
1449             return;
1450         }
1451         check_preds(expr->predicates, state);
1452         RET_ON_ERROR;
1453     }
1454     list_for_each(s, locpath->steps) {
1455         check_preds(s->predicates, state);
1456         RET_ON_ERROR;
1457     }
1458     expr->type = T_NODESET;
1459 }
1460
1461 static void check_app(struct expr *expr, struct state *state) {
1462     assert(expr->tag == E_APP);
1463
1464     for (int i=0; i < expr->func->arity; i++) {
1465         check_expr(expr->args[i], state);
1466         RET_ON_ERROR;
1467     }
1468
1469     int f;
1470     for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1471         const struct func *fn = builtin_funcs + f;
1472         if (STRNEQ(expr->func->name, fn->name))
1473             continue;
1474         if (expr->func->arity != fn->arity)
1475             continue;
1476
1477         int match = 1;
1478         for (int i=0; i < expr->func->arity; i++) {
1479             if (expr->args[i]->type != fn->arg_types[i]) {
1480                 match = 0;
1481                 break;
1482             }
1483         }
1484         if (match)
1485             break;
1486     }
1487
1488     if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1489         expr->func = builtin_funcs + f;
1490         expr->type = expr->func->type;
1491     } else {
1492         STATE_ERROR(state, PATHX_ETYPE);
1493     }
1494 }
1495
1496 /* Check the binary operators. Type rules:
1497  *
1498  * '=', '!='  : T_NODESET -> T_NODESET -> T_BOOLEAN
1499  *              T_STRING  -> T_NODESET -> T_BOOLEAN
1500  *              T_NODESET -> T_STRING  -> T_BOOLEAN
1501  *              T_NUMBER  -> T_NUMBER  -> T_BOOLEAN
1502  *
1503  * '>', '>=',
1504  * '<', '<='  : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1505  *              T_STRING -> T_STRING -> T_BOOLEAN
1506  * '+'        : T_NUMBER -> T_NUMBER -> T_NUMBER
1507  *              T_STRING -> T_STRING -> T_STRING
1508  *              T_REGEXP -> T_REGEXP -> T_REGEXP
1509  * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1510  *
1511  * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1512  * '=~', '!~' : T_STRING  -> T_REGEXP  -> T_BOOLEAN
1513  *              T_NODESET -> T_REGEXP  -> T_BOOLEAN
1514  *
1515  * '|'        : T_NODESET -> T_NODESET -> T_NODESET
1516  *
1517  * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1518  */
1519 static void check_binary(struct expr *expr, struct state *state) {
1520     check_expr(expr->left, state);
1521     check_expr(expr->right, state);
1522     RET_ON_ERROR;
1523
1524     enum type l = expr->left->type;
1525     enum type r = expr->right->type;
1526     int  ok = 1;
1527     enum type res;
1528
1529     switch(expr->op) {
1530     case OP_EQ:
1531     case OP_NEQ:
1532         ok = ((l == T_NODESET || l == T_STRING)
1533               && (r == T_NODESET || r == T_STRING))
1534             || (l == T_NUMBER && r == T_NUMBER);;
1535         res = T_BOOLEAN;
1536         break;
1537     case OP_LT:
1538     case OP_LE:
1539     case OP_GT:
1540     case OP_GE:
1541         ok = (l == T_NUMBER && r == T_NUMBER)
1542             || (l == T_STRING && r == T_STRING);
1543         res = T_BOOLEAN;
1544         break;
1545     case OP_PLUS:
1546         ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1547         res = l;
1548         break;
1549     case OP_MINUS:
1550     case OP_STAR:
1551         ok =  (l == T_NUMBER && r == T_NUMBER);
1552         res = T_NUMBER;
1553         break;
1554     case OP_UNION:
1555         ok = (l == T_NODESET && r == T_NODESET);
1556         res = T_NODESET;
1557         break;
1558     case OP_AND:
1559     case OP_OR:
1560         ok = 1;
1561         res = T_BOOLEAN;
1562         break;
1563     case OP_RE_MATCH:
1564     case OP_RE_NOMATCH:
1565         ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1566         res = T_BOOLEAN;
1567         break;
1568     default:
1569         assert(0);
1570     }
1571     if (! ok) {
1572         STATE_ERROR(state, PATHX_ETYPE);
1573     } else {
1574         expr->type = res;
1575     }
1576 }
1577
1578 static void check_var(struct expr *expr, struct state *state) {
1579     struct value *v = lookup_var(expr->ident, state);
1580     if (v == NULL) {
1581         STATE_ERROR(state, PATHX_ENOVAR);
1582         return;
1583     }
1584     expr->type = v->tag;
1585 }
1586
1587 /* Typecheck an expression */
1588 static void check_expr(struct expr *expr, struct state *state) {
1589     RET_ON_ERROR;
1590     switch(expr->tag) {
1591     case E_FILTER:
1592         check_filter(expr, state);
1593         break;
1594     case E_BINARY:
1595         check_binary(expr, state);
1596         break;
1597     case E_VALUE:
1598         expr->type = expr_value(expr, state)->tag;
1599         break;
1600     case E_VAR:
1601         check_var(expr, state);
1602         break;
1603     case E_APP:
1604         check_app(expr, state);
1605         break;
1606     default:
1607         assert(0);
1608     }
1609 }
1610
1611 /*
1612  * Utility functions for the parser
1613  */
1614
1615 static void skipws(struct state *state) {
1616     while (isspace(*state->pos)) state->pos += 1;
1617 }
1618
1619 static int match(struct state *state, char m) {
1620     skipws(state);
1621
1622     if (*state->pos == '\0')
1623         return 0;
1624     if (*state->pos == m) {
1625         state->pos += 1;
1626         return 1;
1627     }
1628     return 0;
1629 }
1630
1631 static int peek(struct state *state, const char *chars) {
1632     return strchr(chars, *state->pos) != NULL;
1633 }
1634
1635 /* Return 1 if STATE->POS starts with TOKEN, followed by optional
1636  * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1637  * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1638  * unchanged.
1639  */
1640 static int looking_at(struct state *state, const char *token,
1641                       const char *follow) {
1642     if (STREQLEN(state->pos, token, strlen(token))) {
1643         const char *p = state->pos + strlen(token);
1644         while (isspace(*p)) p++;
1645         if (STREQLEN(p, follow, strlen(follow))) {
1646             state->pos = p + strlen(follow);
1647             return 1;
1648         }
1649     }
1650     return 0;
1651 }
1652
1653 /*************************************************************************
1654  * The parser
1655  *************************************************************************/
1656
1657 static void parse_expr(struct state *state);
1658
1659 static struct expr* pop_expr(struct state *state) {
1660     if (state->exprs_used > 0) {
1661         state->exprs_used -= 1;
1662         return state->exprs[state->exprs_used];
1663     } else {
1664         STATE_ERROR(state, PATHX_EINTERNAL);
1665         assert(0);
1666         return NULL;
1667     }
1668 }
1669
1670 static void push_expr(struct expr *expr, struct state *state) {
1671     if (state->exprs_used >= state->exprs_size) {
1672         size_t new_size = 2*state->exprs_size;
1673         if (new_size == 0) new_size = 8;
1674         if (REALLOC_N(state->exprs, new_size) < 0) {
1675             STATE_ENOMEM;
1676             return;
1677         }
1678         state->exprs_size = new_size;
1679     }
1680     state->exprs[state->exprs_used++] = expr;
1681 }
1682
1683 static void push_new_binary_op(enum binary_op op, struct state *state) {
1684     struct expr *expr = NULL;
1685     if (ALLOC(expr) < 0) {
1686         STATE_ENOMEM;
1687         return;
1688     }
1689
1690     expr->tag = E_BINARY;
1691     expr->op  = op;
1692     expr->right = pop_expr(state);
1693     expr->left = pop_expr(state);
1694     push_expr(expr, state);
1695 }
1696
1697 int pathx_escape_name(const char *in, char **out) {
1698     const char *p;
1699     int num_to_escape = 0;
1700     char *s;
1701
1702     *out = NULL;
1703
1704     for (p = in; *p; p++) {
1705         if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1706             num_to_escape += 1;
1707     }
1708
1709     if (num_to_escape == 0)
1710         return 0;
1711
1712     if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1713         return -1;
1714
1715     for (p = in, s = *out; *p; p++) {
1716         if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1717             *s++ = '\\';
1718         *s++ = *p;
1719     }
1720     *s = '\0';
1721     return 0;
1722 }
1723
1724 /*
1725  * NameNoWS ::= [^][|/\= \t\n] | \\.
1726  * NameWS   ::= [^][|/\=] | \\.
1727  * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1728  */
1729 static char *parse_name(struct state *state) {
1730     const char *s = state->pos;
1731     char *result;
1732
1733     while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1734         /* This is a hack: since we allow spaces in names, we need to avoid
1735          * gobbling up stuff that is in follow(Name), e.g. 'or' so that
1736          * things like [name1 or name2] still work.
1737          */
1738         if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1739             STREQLEN(state->pos, " and ", strlen(" and ")))
1740             break;
1741
1742         if (*state->pos == '\\') {
1743             state->pos += 1;
1744             if (*state->pos == '\0') {
1745                 STATE_ERROR(state, PATHX_ENAME);
1746                 return NULL;
1747             }
1748         }
1749         state->pos += 1;
1750     }
1751
1752     /* Strip trailing white space */
1753     if (state->pos > s) {
1754         state->pos -= 1;
1755         while (isspace(*state->pos) && state->pos >= s)
1756             state->pos -= 1;
1757         state->pos += 1;
1758     }
1759
1760     if (state->pos == s) {
1761         STATE_ERROR(state, PATHX_ENAME);
1762         return NULL;
1763     }
1764
1765     result = strndup(s, state->pos - s);
1766     if (result == NULL) {
1767         STATE_ENOMEM;
1768         return NULL;
1769     }
1770
1771     char *p = result;
1772     for (char *t = result; *t != '\0'; t++, p++) {
1773         if (*t == '\\')
1774             t += 1;
1775         *p = *t;
1776     }
1777     *p = '\0';
1778
1779     return result;
1780 }
1781
1782 /*
1783  * Predicate    ::= "[" Expr "]" *
1784  */
1785 static struct pred *parse_predicates(struct state *state) {
1786     struct pred *pred = NULL;
1787     int nexpr = 0;
1788
1789     while (match(state, L_BRACK)) {
1790         parse_expr(state);
1791         nexpr += 1;
1792         RET0_ON_ERROR;
1793
1794         if (! match(state, R_BRACK)) {
1795             STATE_ERROR(state, PATHX_EPRED);
1796             return NULL;
1797         }
1798         skipws(state);
1799     }
1800
1801     if (nexpr == 0)
1802         return NULL;
1803
1804     if (ALLOC(pred) < 0) {
1805         STATE_ENOMEM;
1806         return NULL;
1807     }
1808     pred->nexpr = nexpr;
1809
1810     if (ALLOC_N(pred->exprs, nexpr) < 0) {
1811         free_pred(pred);
1812         STATE_ENOMEM;
1813         return NULL;
1814     }
1815
1816     for (int i = nexpr - 1; i >= 0; i--)
1817         pred->exprs[i] = pop_expr(state);
1818
1819     return pred;
1820 }
1821
1822 /*
1823  * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1824  * AxisSpecifier ::= AxisName '::' | <epsilon>
1825  * AxisName ::= 'ancestor'
1826  *            | 'ancestor-or-self'
1827  *            | 'child'
1828  *            | 'descendant'
1829  *            | 'descendant-or-self'
1830  *            | 'parent'
1831  *            | 'self'
1832  *            | 'root'
1833  */
1834 static struct step *parse_step(struct state *state) {
1835     struct step *step;
1836     int explicit_axis = 0, allow_predicates = 1;
1837
1838     if (ALLOC(step) < 0) {
1839         STATE_ENOMEM;
1840         return NULL;
1841     }
1842
1843     step->axis = CHILD;
1844     for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1845         if (looking_at(state, axis_names[i], "::")) {
1846             step->axis = i;
1847             explicit_axis = 1;
1848             break;
1849         }
1850     }
1851
1852     if (! match(state, '*')) {
1853         step->name = parse_name(state);
1854         if (HAS_ERROR(state))
1855             goto error;
1856         if (! explicit_axis) {
1857             if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
1858                 step->axis = STREQ(step->name, ".") ? SELF : PARENT;
1859                 FREE(step->name);
1860                 allow_predicates = 0;
1861             }
1862         }
1863     }
1864
1865     if (allow_predicates) {
1866         step->predicates = parse_predicates(state);
1867         if (HAS_ERROR(state))
1868             goto error;
1869     }
1870
1871     return step;
1872
1873  error:
1874     free_step(step);
1875     return NULL;
1876 }
1877
1878 static struct step *make_step(enum axis axis, struct state *state) {
1879     struct step *result = NULL;
1880
1881     if (ALLOC(result) < 0) {
1882         STATE_ENOMEM;
1883         return NULL;
1884     }
1885     result->axis = axis;
1886     return result;
1887 }
1888
1889 /*
1890  * RelativeLocationPath ::= Step
1891  *                        | RelativeLocationPath '/' Step
1892  *                        | AbbreviatedRelativeLocationPath
1893  * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
1894  *
1895  * The above is the same as
1896  * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
1897  */
1898 static struct locpath *
1899 parse_relative_location_path(struct state *state) {
1900     struct step *step = NULL;
1901     struct locpath *locpath = NULL;
1902
1903     step = parse_step(state);
1904     if (HAS_ERROR(state))
1905         goto error;
1906
1907     if (ALLOC(locpath) < 0) {
1908         STATE_ENOMEM;
1909         goto error;
1910     }
1911     list_append(locpath->steps, step);
1912     step = NULL;
1913
1914     while (match(state, '/')) {
1915         if (*state->pos == '/') {
1916             state->pos += 1;
1917             step = make_step(DESCENDANT_OR_SELF, state);
1918             ENOMEM_ON_NULL(state, step);
1919             list_append(locpath->steps, step);
1920         }
1921         step = parse_step(state);
1922         if (HAS_ERROR(state))
1923             goto error;
1924         list_append(locpath->steps, step);
1925         step = NULL;
1926     }
1927     return locpath;
1928
1929  error:
1930     free_step(step);
1931     free_locpath(locpath);
1932     return NULL;
1933 }
1934
1935 /*
1936  * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
1937  * AbsoluteLocationPath ::= '/' RelativeLocationPath?
1938  *                        | AbbreviatedAbsoluteLocationPath
1939  * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
1940  *
1941  */
1942 static void parse_location_path(struct state *state) {
1943     struct expr *expr = NULL;
1944     struct locpath *locpath = NULL;
1945
1946     if (match(state, '/')) {
1947         if (*state->pos == '/') {
1948             state->pos += 1;
1949             locpath = parse_relative_location_path(state);
1950             if (HAS_ERROR(state))
1951                 return;
1952             struct step *step = make_step(DESCENDANT_OR_SELF, state);
1953             if (HAS_ERROR(state))
1954                 goto error;
1955             list_cons(locpath->steps, step);
1956         } else {
1957             if (*state->pos != '\0') {
1958                 locpath = parse_relative_location_path(state);
1959             } else {
1960                 if (ALLOC(locpath) < 0)
1961                     goto err_nomem;
1962             }
1963             struct step *step = make_step(ROOT, state);
1964             if (HAS_ERROR(state))
1965                 goto error;
1966             list_cons(locpath->steps, step);
1967         }
1968     } else {
1969         locpath = parse_relative_location_path(state);
1970     }
1971
1972     if (ALLOC(expr) < 0)
1973         goto err_nomem;
1974     expr->tag = E_FILTER;
1975     expr->locpath = locpath;
1976     push_expr(expr, state);
1977     return;
1978
1979  err_nomem:
1980     STATE_ENOMEM;
1981  error:
1982     free_expr(expr);
1983     free_locpath(locpath);
1984     return;
1985 }
1986
1987 /*
1988  * Number       ::= /[0-9]+/
1989  */
1990 static void parse_number(struct state *state) {
1991     struct expr *expr = NULL;
1992     unsigned long val;
1993     char *end;
1994
1995     errno = 0;
1996     val = strtoul(state->pos, &end, 10);
1997     if (errno || end == state->pos || (int) val != val) {
1998         STATE_ERROR(state, PATHX_ENUMBER);
1999         return;
2000     }
2001
2002     state->pos = end;
2003
2004     if (ALLOC(expr) < 0)
2005         goto err_nomem;
2006     expr->tag = E_VALUE;
2007     expr->value_ind = make_value(T_NUMBER, state);
2008     if (HAS_ERROR(state))
2009         goto error;
2010     expr_value(expr, state)->number = val;
2011
2012     push_expr(expr, state);
2013     return;
2014
2015  err_nomem:
2016     STATE_ENOMEM;
2017  error:
2018     free_expr(expr);
2019     return;
2020 }
2021
2022 /*
2023  * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2024  */
2025 static void parse_literal(struct state *state) {
2026     char delim;
2027     const char *s;
2028     struct expr *expr = NULL;
2029
2030     if (*state->pos == '"')
2031         delim = '"';
2032     else if (*state->pos == '\'')
2033         delim = '\'';
2034     else {
2035         STATE_ERROR(state, PATHX_ESTRING);
2036         return;
2037     }
2038     state->pos += 1;
2039
2040     s = state->pos;
2041     while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2042
2043     if (*state->pos != delim) {
2044         STATE_ERROR(state, PATHX_EDELIM);
2045         return;
2046     }
2047     state->pos += 1;
2048
2049     if (ALLOC(expr) < 0)
2050         goto err_nomem;
2051     expr->tag = E_VALUE;
2052     expr->value_ind = make_value(T_STRING, state);
2053     if (HAS_ERROR(state))
2054         goto error;
2055     expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2056     if (expr_value(expr, state)->string == NULL)
2057         goto err_nomem;
2058
2059     push_expr(expr, state);
2060     return;
2061
2062  err_nomem:
2063     STATE_ENOMEM;
2064  error:
2065     free_expr(expr);
2066     return;
2067 }
2068
2069 /*
2070  * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2071  */
2072 static void parse_function_call(struct state *state) {
2073     const struct func *func = NULL;
2074     struct expr *expr = NULL;
2075     int nargs = 0, find = 0;
2076
2077     for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2078         if (looking_at(state, builtin_funcs[find].name, "(")) {
2079             func = builtin_funcs + find;
2080             break;
2081         }
2082     }
2083     if (func == NULL) {
2084         STATE_ERROR(state, PATHX_ENAME);
2085         return;
2086     }
2087
2088     if (! match(state, ')')) {
2089         do {
2090             nargs += 1;
2091             parse_expr(state);
2092             RET_ON_ERROR;
2093         } while (match(state, ','));
2094
2095         if (! match(state, ')')) {
2096             STATE_ERROR(state, PATHX_EPAREN);
2097             return;
2098         }
2099     }
2100
2101     int found = 0; /* Whether there is a builtin matching in name and arity */
2102     for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2103         if (STRNEQ(func->name, builtin_funcs[i].name))
2104             break;
2105         if (builtin_funcs[i].arity == nargs) {
2106             func = builtin_funcs + i;
2107             found = 1;
2108             break;
2109         }
2110     }
2111
2112     if (! found) {
2113         STATE_ERROR(state, PATHX_EARITY);
2114         return;
2115     }
2116
2117     if (ALLOC(expr) < 0) {
2118         STATE_ENOMEM;
2119         return;
2120     }
2121     expr->tag = E_APP;
2122     if (ALLOC_N(expr->args, nargs) < 0) {
2123         free_expr(expr);
2124         STATE_ENOMEM;
2125         return;
2126     }
2127     expr->func = func;
2128     for (int i = nargs - 1; i >= 0; i--)
2129         expr->args[i] = pop_expr(state);
2130
2131     push_expr(expr, state);
2132 }
2133
2134 /*
2135  * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2136  *
2137  * The '$' is consumed by parse_primary_expr
2138  */
2139 static void parse_var(struct state *state) {
2140     const char *id = state->pos;
2141     struct expr *expr = NULL;
2142
2143     if (!isalpha(*id) && *id != '_') {
2144         STATE_ERROR(state, PATHX_ENAME);
2145         return;
2146     }
2147     id++;
2148     while (isalpha(*id) || isdigit(*id) || *id == '_')
2149         id += 1;
2150
2151     if (ALLOC(expr) < 0)
2152         goto err_nomem;
2153     expr->tag = E_VAR;
2154     expr->ident = strndup(state->pos, id - state->pos);
2155     if (expr->ident == NULL)
2156         goto err_nomem;
2157
2158     push_expr(expr, state);
2159     state->pos = id;
2160     return;
2161  err_nomem:
2162     STATE_ENOMEM;
2163     free_expr(expr);
2164     return;
2165 }
2166
2167 /*
2168  * PrimaryExpr ::= Literal
2169  *               | Number
2170  *               | FunctionCall
2171  *               | VariableReference
2172  *               | '(' Expr ')'
2173  *
2174  */
2175 static void parse_primary_expr(struct state *state) {
2176     if (peek(state, "'\"")) {
2177         parse_literal(state);
2178     } else if (peek(state, "0123456789")) {
2179         parse_number(state);
2180     } else if (match(state, '(')) {
2181         parse_expr(state);
2182         RET_ON_ERROR;
2183         if (! match(state, ')')) {
2184             STATE_ERROR(state, PATHX_EPAREN);
2185             return;
2186         }
2187     } else if (match(state, '$')) {
2188         parse_var(state);
2189     } else {
2190         parse_function_call(state);
2191     }
2192 }
2193
2194 static int looking_at_primary_expr(struct state *state) {
2195     const char *s = state->pos;
2196     /* Is it a Number, Literal or VariableReference ? */
2197     if (peek(state, "$'\"0123456789"))
2198         return 1;
2199
2200     /* Or maybe a function call, i.e. a word followed by a '(' ?
2201      * Note that our function names are only [a-zA-Z]+
2202      */
2203     while (*s != '\0' && isalpha(*s)) s++;
2204     while (*s != '\0' && isspace(*s)) s++;
2205     return *s == '(';
2206 }
2207
2208 /*
2209  * PathExpr ::= LocationPath
2210  *            | FilterExpr
2211  *            | FilterExpr '/' RelativeLocationPath
2212  *            | FilterExpr '//' RelativeLocationPath
2213  *
2214  * FilterExpr ::= PrimaryExpr Predicate
2215  *
2216  * The grammar is ambiguous here: the expression '42' can either be the
2217  * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2218  * reason for this ambiguity is that we allow node names like '42' in the
2219  * tree; rather than forbid them, we resolve the ambiguity by always
2220  * parsing '42' as a number, and requiring that the user write the
2221  * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2222  */
2223 static void parse_path_expr(struct state *state) {
2224     struct expr *expr = NULL;
2225     struct pred *predicates = NULL;
2226     struct locpath *locpath = NULL;
2227
2228     if (looking_at_primary_expr(state)) {
2229         parse_primary_expr(state);
2230         RET_ON_ERROR;
2231         predicates = parse_predicates(state);
2232         RET_ON_ERROR;
2233         if (match(state, '/')) {
2234             if (match(state, '/')) {
2235                 locpath = parse_relative_location_path(state);
2236                 if (HAS_ERROR(state))
2237                     goto error;
2238
2239                 struct step *step = make_step(DESCENDANT_OR_SELF, state);
2240                 if (HAS_ERROR(state))
2241                     return;
2242                 list_cons(locpath->steps, step);
2243             } else {
2244                 if (*state->pos == '\0') {
2245                     STATE_ERROR(state, PATHX_EEND);
2246                     goto error;
2247                 }
2248                 locpath = parse_relative_location_path(state);
2249             }
2250         }
2251         /* A PathExpr without predicates and locpath is
2252          * just a PrimaryExpr
2253          */
2254         if (predicates == NULL && locpath == NULL)
2255             return;
2256         /* To make evaluation easier, we parse something like
2257          *   $var[pred] as $var[pred]/.
2258          */
2259         if (locpath == NULL) {
2260             if (ALLOC(locpath) < 0)
2261                 goto error;
2262             if (ALLOC(locpath->steps) < 0)
2263                 goto error;
2264             locpath->steps->axis = SELF;
2265         }
2266         if (ALLOC(expr) < 0)
2267             goto error;
2268         expr->tag = E_FILTER;
2269         expr->predicates = predicates;
2270         expr->primary    = pop_expr(state);
2271         expr->locpath    = locpath;
2272         push_expr(expr, state);
2273     } else {
2274         parse_location_path(state);
2275     }
2276     return;
2277  error:
2278     free_expr(expr);
2279     free_pred(predicates);
2280     free_locpath(locpath);
2281     return;
2282 }
2283
2284 /*
2285  * UnionExpr ::= PathExpr ('|' PathExpr)*
2286  */
2287 static void parse_union_expr(struct state *state) {
2288     parse_path_expr(state);
2289     RET_ON_ERROR;
2290     while (match(state, '|')) {
2291         parse_path_expr(state);
2292         RET_ON_ERROR;
2293         push_new_binary_op(OP_UNION, state);
2294     }
2295 }
2296
2297 /*
2298  * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2299  */
2300 static void parse_multiplicative_expr(struct state *state) {
2301     parse_union_expr(state);
2302     RET_ON_ERROR;
2303     while (match(state, '*')) {
2304         parse_union_expr(state);
2305         RET_ON_ERROR;
2306         push_new_binary_op(OP_STAR, state);
2307     }
2308 }
2309
2310 /*
2311  * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2312  * AdditiveOp   ::= '+' | '-'
2313  */
2314 static void parse_additive_expr(struct state *state) {
2315     parse_multiplicative_expr(state);
2316     RET_ON_ERROR;
2317     while (*state->pos == '+' || *state->pos == '-') {
2318         enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2319         state->pos += 1;
2320         skipws(state);
2321         parse_multiplicative_expr(state);
2322         RET_ON_ERROR;
2323         push_new_binary_op(op, state);
2324     }
2325 }
2326
2327 /*
2328  * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2329  * RelationalOp ::= ">" | "<" | ">=" | "<="
2330  */
2331 static void parse_relational_expr(struct state *state) {
2332     parse_additive_expr(state);
2333     RET_ON_ERROR;
2334     if (*state->pos == '<' || *state->pos == '>') {
2335         enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2336         state->pos += 1;
2337         if (*state->pos == '=') {
2338             op = (op == OP_LT) ? OP_LE : OP_GE;
2339             state->pos += 1;
2340         }
2341         skipws(state);
2342         parse_additive_expr(state);
2343         RET_ON_ERROR;
2344         push_new_binary_op(op, state);
2345     }
2346 }
2347
2348 /*
2349  * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2350  * EqualityOp ::= "=" | "!="
2351  * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2352  * MatchOp ::= "=~" | "!~"
2353  */
2354 static void parse_equality_expr(struct state *state) {
2355     parse_relational_expr(state);
2356     RET_ON_ERROR;
2357     if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2358         enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2359         state->pos += 2;
2360         skipws(state);
2361         parse_relational_expr(state);
2362         RET_ON_ERROR;
2363         push_new_binary_op(op, state);
2364     } else if (*state->pos == '=' ||
2365         (*state->pos == '!' && state->pos[1] == '=')) {
2366         enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2367         state->pos += (op == OP_EQ) ? 1 : 2;
2368         skipws(state);
2369         parse_relational_expr(state);
2370         RET_ON_ERROR;
2371         push_new_binary_op(op, state);
2372     }
2373 }
2374
2375 /*
2376  * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2377  */
2378 static void parse_and_expr(struct state *state) {
2379     parse_equality_expr(state);
2380     RET_ON_ERROR;
2381     while (*state->pos == 'a' && state->pos[1] == 'n'
2382         && state->pos[2] == 'd') {
2383         state->pos += 3;
2384         skipws(state);
2385         parse_equality_expr(state);
2386         RET_ON_ERROR;
2387         push_new_binary_op(OP_AND, state);
2388     }
2389 }
2390
2391 /*
2392  * OrExpr ::= AndExpr ('or' AndExpr)*
2393  */
2394 static void parse_or_expr(struct state *state) {
2395     parse_and_expr(state);
2396     RET_ON_ERROR;
2397     while (*state->pos == 'o' && state->pos[1] == 'r') {
2398         state->pos += 2;
2399         skipws(state);
2400         parse_and_expr(state);
2401         RET_ON_ERROR;
2402         push_new_binary_op(OP_OR, state);
2403     }
2404 }
2405
2406 /*
2407  * Expr ::= OrExpr
2408  */
2409 static void parse_expr(struct state *state) {
2410     skipws(state);
2411     parse_or_expr(state);
2412 }
2413
2414 static void store_error(struct pathx *pathx) {
2415     const char *pathx_msg = NULL;
2416     const char *path = pathx->state->txt;
2417     const pathx_errcode_t errcode = pathx->state->errcode;
2418     struct error *err = pathx->state->error;
2419
2420     char *pos_str = pathx->state->errmsg;
2421     pathx->state->errmsg = NULL;
2422
2423     if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2424         return;
2425
2426     switch (errcode) {
2427     case PATHX_ENOMEM:
2428         err->code = AUG_ENOMEM;
2429         break;
2430     case PATHX_EMMATCH:
2431         err->code = AUG_EMMATCH;
2432         break;
2433     case PATHX_ENOMATCH:
2434         err->code = AUG_ENOMATCH;
2435         break;
2436     default:
2437         err->code = AUG_EPATHX;
2438         break;
2439     }
2440
2441     /* We only need details for pathx syntax errors */
2442     if (err->code != AUG_EPATHX)
2443         return;
2444
2445     int pos;
2446     pathx_msg = pathx_error(pathx, NULL, &pos);
2447
2448     bool has_msg = pos_str != NULL;
2449     int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2450     if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2451         if (has_msg) {
2452             strcat(pos_str, " in ");
2453             strncat(pos_str, path, pos);
2454         } else {
2455             /* initialize pos_str explicitly, path might be "" */
2456             pos_str[0] = '\0';
2457             strncat(pos_str, path, pos);
2458         }
2459         strcat(pos_str, "|=|");
2460         strcat(pos_str, path + pos);
2461     }
2462
2463     err->minor = errcode;
2464     err->details = pos_str;
2465     pos_str = NULL;
2466     err->minor_details = pathx_msg;
2467 }
2468
2469 int pathx_parse(const struct tree *tree,
2470                 struct error *err,
2471                 const char *txt,
2472                 bool need_nodeset,
2473                 struct pathx_symtab *symtab,
2474                 struct tree *root_ctx,
2475                 struct pathx **pathx) {
2476     struct state *state = NULL;
2477
2478     *pathx = NULL;
2479
2480     if (ALLOC(*pathx) < 0)
2481         goto oom;
2482
2483     (*pathx)->origin = (struct tree *) tree;
2484
2485     /* Set up state */
2486     if (ALLOC((*pathx)->state) < 0)
2487         goto oom;
2488     state = (*pathx)->state;
2489
2490     state->errcode = PATHX_NOERROR;
2491     state->errmsg = NULL;
2492     state->txt = txt;
2493     state->pos = txt;
2494     state->symtab = symtab;
2495     state->root_ctx = root_ctx;
2496     state->error = err;
2497
2498     if (ALLOC_N(state->value_pool, 8) < 0) {
2499         STATE_ENOMEM;
2500         goto done;
2501     }
2502     state->value_pool_size = 8;
2503     state->value_pool[0].tag = T_BOOLEAN;
2504     state->value_pool[0].boolval = 0;
2505     state->value_pool[1].tag = T_BOOLEAN;
2506     state->value_pool[1].boolval = 1;
2507     state->value_pool_used = 2;
2508
2509     /* Parse */
2510     parse_expr(state);
2511     if (HAS_ERROR(state))
2512         goto done;
2513     if (state->pos != state->txt + strlen(state->txt)) {
2514         STATE_ERROR(state, PATHX_EEND);
2515         goto done;
2516     }
2517
2518     if (state->exprs_used != 1) {
2519         STATE_ERROR(state, PATHX_EINTERNAL);
2520         goto done;
2521     }
2522
2523     /* Typecheck */
2524     check_expr(state->exprs[0], state);
2525     if (HAS_ERROR(state))
2526         goto done;
2527
2528     if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2529         STATE_ERROR(state, PATHX_ETYPE);
2530         goto done;
2531     }
2532
2533  done:
2534     store_error(*pathx);
2535     return state->errcode;
2536  oom:
2537     free_pathx(*pathx);
2538     *pathx = NULL;
2539     if (err != NULL)
2540         err->code = AUG_ENOMEM;
2541     return PATHX_ENOMEM;
2542 }
2543
2544 /*************************************************************************
2545  * Searching in the tree
2546  *************************************************************************/
2547
2548 static bool step_matches(struct step *step, struct tree *tree) {
2549     if (step->name == NULL) {
2550         return step->axis == ROOT || tree->label != NULL;
2551     } else {
2552         return streqx(step->name, tree->label);
2553     }
2554 }
2555
2556 static struct tree *tree_prev(struct tree *pos) {
2557     struct tree *node = NULL;
2558     if (pos != pos->parent->children) {
2559         for (node = pos->parent->children;
2560              node->next != pos;
2561              node = node->next);
2562     }
2563     return node;
2564 }
2565
2566 /* When the first step doesn't begin with ROOT then use relative root context
2567  * instead. */
2568 static struct tree *step_root(struct step *step, struct tree *ctx,
2569                               struct tree *root_ctx) {
2570     struct tree *node = NULL;
2571     switch (step->axis) {
2572     case SELF:
2573     case CHILD:
2574     case DESCENDANT:
2575     case PARENT:
2576     case ANCESTOR:
2577     case PRECEDING_SIBLING:
2578     case FOLLOWING_SIBLING:
2579         /* only use root_ctx when ctx is the absolute tree root */
2580         if (ctx == ctx->parent && root_ctx != NULL)
2581             node = root_ctx;
2582         else
2583             node = ctx;
2584         break;
2585     case ROOT:
2586     case DESCENDANT_OR_SELF:
2587         node = ctx;
2588         break;
2589     default:
2590         assert(0);
2591     }
2592     if (node == NULL)
2593         return NULL;
2594     return node;
2595 }
2596
2597 static struct tree *step_first(struct step *step, struct tree *ctx) {
2598     struct tree *node = NULL;
2599     switch (step->axis) {
2600     case SELF:
2601     case DESCENDANT_OR_SELF:
2602         node = ctx;
2603         break;
2604     case CHILD:
2605     case DESCENDANT:
2606         node = ctx->children;
2607         break;
2608     case PARENT:
2609     case ANCESTOR:
2610         node = ctx->parent;
2611         break;
2612     case ROOT:
2613         while (ctx->parent != ctx)
2614             ctx = ctx->parent;
2615         node = ctx;
2616         break;
2617     case PRECEDING_SIBLING:
2618         node = tree_prev(ctx);
2619         break;
2620     case FOLLOWING_SIBLING:
2621         node = ctx->next;
2622         break;
2623     default:
2624         assert(0);
2625     }
2626     if (node == NULL)
2627         return NULL;
2628     if (step_matches(step, node))
2629         return node;
2630     return step_next(step, ctx, node);
2631 }
2632
2633 static struct tree *step_next(struct step *step, struct tree *ctx,
2634                               struct tree *node) {
2635     while (node != NULL) {
2636         switch (step->axis) {
2637         case SELF:
2638             node = NULL;
2639             break;
2640         case CHILD:
2641             node = node->next;
2642             break;
2643         case DESCENDANT:
2644         case DESCENDANT_OR_SELF:
2645             if (node->children != NULL) {
2646                 node = node->children;
2647             } else {
2648                 while (node->next == NULL && node != ctx)
2649                     node = node->parent;
2650                 if (node == ctx)
2651                     node = NULL;
2652                 else
2653                     node = node->next;
2654             }
2655             break;
2656         case PARENT:
2657         case ROOT:
2658             node = NULL;
2659             break;
2660         case ANCESTOR:
2661             if (node->parent == node)
2662                 node = NULL;
2663             else
2664                 node = node->parent;
2665             break;
2666         case PRECEDING_SIBLING:
2667             node = tree_prev(node);
2668             break;
2669         case FOLLOWING_SIBLING:
2670             node = node->next;
2671             break;
2672         default:
2673             assert(0);
2674         }
2675         if (node != NULL && step_matches(step, node))
2676             break;
2677     }
2678     return node;
2679 }
2680
2681 static struct value *pathx_eval(struct pathx *pathx) {
2682     struct state *state = pathx->state;
2683     state->ctx = pathx->origin;
2684     state->ctx_pos = 1;
2685     state->ctx_len = 1;
2686     eval_expr(state->exprs[0], state);
2687     if (HAS_ERROR(state))
2688         return NULL;
2689
2690     if (state->values_used != 1) {
2691         STATE_ERROR(state, PATHX_EINTERNAL);
2692         return NULL;
2693     }
2694     return pop_value(state);
2695 }
2696
2697 struct tree *pathx_next(struct pathx *pathx) {
2698     if (pathx->node + 1 < pathx->nodeset->used)
2699         return pathx->nodeset->nodes[++pathx->node];
2700     return NULL;
2701 }
2702
2703 /* Find the first node in TREE matching PATH. */
2704 struct tree *pathx_first(struct pathx *pathx) {
2705     if (pathx->nodeset == NULL) {
2706         struct value *v = pathx_eval(pathx);
2707
2708         if (HAS_ERROR(pathx->state))
2709             goto error;
2710         assert(v->tag == T_NODESET);
2711         pathx->nodeset = v->nodeset;
2712     }
2713     pathx->node = 0;
2714     if (pathx->nodeset->used == 0)
2715         return NULL;
2716     else
2717         return pathx->nodeset->nodes[0];
2718  error:
2719     store_error(pathx);
2720     return NULL;
2721 }
2722
2723 /* Find a node in the tree that matches the longest prefix of PATH.
2724  *
2725  * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2726  * prefix matches, and -1 if more than one node in the tree match.
2727  *
2728  * TMATCH is set to the tree node that matches, and SMATCH to the next step
2729  * after the one where TMATCH matched. If no node matches or multiple nodes
2730  * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2731  * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2732  */
2733 static int locpath_search(struct locpath_trace *lpt,
2734                           struct tree **tmatch, struct step **smatch) {
2735     int last;
2736     int result = -1;
2737
2738     for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2739     if (last < 0) {
2740         *smatch = lpt->lp->steps;
2741         result = 1;
2742         goto done;
2743     }
2744     if (lpt->ns[last]->used > 1) {
2745         result = -1;
2746         goto done;
2747     }
2748     result = 0;
2749     *tmatch = lpt->ns[last]->nodes[0];
2750     *smatch = lpt->lp->steps;
2751     for (int i=0; i < last; i++)
2752         *smatch = (*smatch)->next;
2753  done:
2754     for (int i=0; i < lpt->maxns; i++)
2755         free_nodeset(lpt->ns[i]);
2756     FREE(lpt->ns);
2757     return result;
2758 }
2759
2760 /* Expand the tree ROOT so that it contains all components of PATH. PATH
2761  * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2762  *
2763  * Return the first segment that was created by this operation, or NULL on
2764  * error.
2765  */
2766 int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2767     int r;
2768     struct step *step = NULL;
2769     struct locpath_trace lpt;
2770     struct tree *first_child = NULL;
2771     struct value *v = NULL;
2772
2773     MEMZERO(&lpt, 1);
2774     path->state->locpath_trace = &lpt;
2775     v = pathx_eval(path);
2776     path->state->locpath_trace = NULL;
2777     if (HAS_ERROR(path->state))
2778         goto error;
2779
2780     if (lpt.maxns == 0) {
2781         if (v->tag != T_NODESET || v->nodeset->used == 0) {
2782             STATE_ERROR(path->state, PATHX_ENOMATCH);
2783             goto error;
2784         }
2785         if (v->nodeset->used > 1)
2786             goto error;
2787         *tree = v->nodeset->nodes[0];
2788         return 0;
2789     }
2790
2791     *tree = path->origin;
2792     r = locpath_search(&lpt, tree, &step);
2793     if (r == -1) {
2794         STATE_ERROR(path->state, PATHX_EMMATCH);
2795         goto error;
2796     }
2797
2798     if (step == NULL)
2799         return 0;
2800
2801     struct tree *parent = *tree;
2802     if (parent == NULL)
2803         parent = path->origin;
2804
2805     list_for_each(s, step) {
2806         if (s->name == NULL || s->axis != CHILD)
2807             goto error;
2808         struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2809         if (first_child == NULL)
2810             first_child = t;
2811         if (t == NULL || t->label == NULL)
2812             goto error;
2813         list_append(parent->children, t);
2814         parent = t;
2815     }
2816
2817     while (first_child->children != NULL)
2818         first_child = first_child->children;
2819
2820     *tree = first_child;
2821     return 1;
2822
2823  error:
2824     if (first_child != NULL) {
2825         list_remove(first_child, first_child->parent->children);
2826         free_tree(first_child);
2827     }
2828     *tree = NULL;
2829     store_error(path);
2830     return -1;
2831 }
2832
2833 int pathx_find_one(struct pathx *path, struct tree **tree) {
2834     *tree = pathx_first(path);
2835     if (HAS_ERROR(path->state))
2836         return -1;
2837     return path->nodeset->used;
2838 }
2839
2840 struct error *err_of_pathx(struct pathx *px) {
2841     return px->state->error;
2842 }
2843
2844 const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
2845     int errcode = PATHX_ENOMEM;
2846
2847     if (path != NULL) {
2848         if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
2849             errcode = path->state->errcode;
2850         else
2851             errcode = PATHX_EINTERNAL;
2852
2853         if (txt)
2854             *txt = path->state->txt;
2855
2856         if (pos)
2857             *pos = path->state->pos - path->state->txt;
2858     }
2859     return errcodes[errcode];
2860 }
2861
2862 /*
2863  * Symbol tables
2864  */
2865 static struct pathx_symtab
2866 *make_symtab(struct pathx_symtab *symtab, const char *name,
2867              struct value *value)
2868 {
2869     struct pathx_symtab *new;
2870     char *n = NULL;
2871
2872     n = strdup(name);
2873     if (n == NULL)
2874         return NULL;
2875
2876     if (ALLOC(new) < 0) {
2877         free(n);
2878         return NULL;
2879     }
2880     new->name = n;
2881     new->value = value;
2882     if (symtab == NULL) {
2883         return new;
2884     } else {
2885         new->next = symtab->next;
2886         symtab->next = new;
2887     }
2888     return symtab;
2889 }
2890
2891 void free_symtab(struct pathx_symtab *symtab) {
2892
2893     while (symtab != NULL) {
2894         struct pathx_symtab *del = symtab;
2895         symtab = del->next;
2896         free(del->name);
2897         release_value(del->value);
2898         free(del->value);
2899         free(del);
2900     }
2901 }
2902
2903 struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
2904     return pathx->state->symtab;
2905 }
2906
2907 static int pathx_symtab_set(struct pathx_symtab **symtab,
2908                             const char *name, struct value *v) {
2909     int found = 0;
2910
2911     list_for_each(tab, *symtab) {
2912         if (STREQ(tab->name, name)) {
2913             release_value(tab->value);
2914             free(tab->value);
2915             tab->value = v;
2916             found = 1;
2917             break;
2918         }
2919     }
2920
2921     if (!found) {
2922         struct pathx_symtab *new = NULL;
2923
2924         new = make_symtab(*symtab, name, v);
2925         if (new == NULL)
2926             goto error;
2927         *symtab = new;
2928     }
2929     return 0;
2930  error:
2931     return -1;
2932 }
2933
2934 int pathx_symtab_define(struct pathx_symtab **symtab,
2935                         const char *name, struct pathx *px) {
2936     int r;
2937     struct value *value = NULL, *v = NULL;
2938     struct state *state = px->state;
2939
2940     value = pathx_eval(px);
2941     if (HAS_ERROR(px->state))
2942         goto error;
2943
2944     if (ALLOC(v) < 0) {
2945         STATE_ENOMEM;
2946         goto error;
2947     }
2948
2949     *v = *value;
2950     value->tag = T_BOOLEAN;
2951
2952     r = pathx_symtab_set(symtab, name, v);
2953     if (r < 0) {
2954         STATE_ENOMEM;
2955         goto error;
2956     }
2957
2958     if (v->tag == T_NODESET)
2959         return v->nodeset->used;
2960     else
2961         return 0;
2962  error:
2963     release_value(value);
2964     free(value);
2965     release_value(v);
2966     free(v);
2967     store_error(px);
2968     return -1;
2969 }
2970
2971 int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
2972     struct pathx_symtab *del = NULL;
2973
2974     for(del = *symtab;
2975         del != NULL && !STREQ(del->name, name);
2976         del = del->next);
2977     if (del == NULL)
2978         return 0;
2979     list_remove(del, *symtab);
2980     free_symtab(del);
2981     return 0;
2982 }
2983
2984 int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
2985                              const char *name, struct tree *tree) {
2986     struct value *v = NULL;
2987     int r;
2988
2989     if (ALLOC(v) < 0)
2990         goto error;
2991
2992     v->tag = T_NODESET;
2993     if (ALLOC(v->nodeset) < 0)
2994         goto error;
2995     if (ALLOC_N(v->nodeset->nodes, 1) < 0)
2996         goto error;
2997     v->nodeset->used = 1;
2998     v->nodeset->size = 1;
2999     v->nodeset->nodes[0] = tree;
3000
3001     r = pathx_symtab_set(symtab, name, v);
3002     if (r < 0)
3003         goto error;
3004     return 1;
3005  error:
3006     release_value(v);
3007     free(v);
3008     return -1;
3009 }
3010
3011 void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3012                                      const struct tree *tree) {
3013     list_for_each(tab, symtab) {
3014         if (tab->value->tag != T_NODESET)
3015             continue;
3016         struct nodeset *ns = tab->value->nodeset;
3017         for (int i=0; i < ns->used;) {
3018             struct tree *t = ns->nodes[i];
3019             while (t != t->parent && t != tree)
3020                 t = t->parent;
3021             if (t == tree)
3022                 ns_remove(ns, i, 1);
3023             else
3024                 i += 1;
3025         }
3026     }
3027 }
3028
3029 /*
3030  * Local variables:
3031  *  indent-tabs-mode: nil
3032  *  c-indent-level: 4
3033  *  c-basic-offset: 4
3034  *  tab-width: 4
3035  * End:
3036  */