2 * parser.c: parse a configuration file according to a grammar
4 * Copyright (C) 2007-2015 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
36 /* Our favorite error message */
37 static const char *const short_iteration =
38 "Iterated lens matched less than it should";
52 char *value; /* GET_STORE leaves a value here */
53 struct lns_error *error;
54 /* We use the registers from a regular expression match to keep track
55 * of the substring we are currently looking at. REGS are the registers
56 * from the last regexp match; NREG is the number of the register
57 * in REGS that describes the substring we are currently looking at.
59 * We adjust NREG as we traverse lenses either because we know the
60 * grouping structure of lenses (for L_STAR, the child lens is always
61 * in NREG + 1) or by looking at the number of groups in a sublens (as
62 * we move from one child of L_CONCAT to the next, we need to add 1 +
63 * number of groups of that child to NREG) How NREG is adjusted is
64 * closely related to how the REGEXP_* routines build up bigger regexps
67 struct re_registers *regs;
71 /* Used by recursive lenses to stack intermediate results */
81 struct { /* M_PARSE */
88 /* Used by recursive lenses in get_rec and parse_rec */
89 enum mode_t { M_GET, M_PARSE };
91 /* Abstract Syntax Tree for recursive parse */
94 struct ast **children;
107 struct frame *frames;
109 uint lvl; /* Debug only */
113 #define REG_START(state) ((state)->regs->start[(state)->nreg])
114 #define REG_END(state) ((state)->regs->end[(state)->nreg])
115 #define REG_SIZE(state) (REG_END(state) - REG_START(state))
116 #define REG_POS(state) ((state)->text + REG_START(state))
117 #define REG_VALID(state) ((state)->regs != NULL && \
118 (state)->nreg < (state)->regs->num_regs)
119 #define REG_MATCHED(state) (REG_VALID(state) \
120 && (state)->regs->start[(state)->nreg] >= 0)
125 static struct ast *make_ast(struct lens *lens) {
126 struct ast *ast = NULL;
132 if (ALLOC_N(ast->children, ast->capacity) < 0) {
139 /* recursively free the node and all it's descendants */
140 static void free_ast(struct ast *ast) {
144 for (i = 0; i < ast->nchildren; i++) {
145 free_ast(ast->children[i]);
147 if (ast->children != NULL)
152 static struct ast *ast_root(struct ast *ast) {
153 struct ast *root = ast;
154 while(root != NULL && root->parent != NULL)
159 static void ast_set(struct ast *ast, uint start, uint end) {
166 /* append a child to the parent ast */
167 static struct ast *ast_append(struct rec_state *rec_state, struct lens *lens,
168 uint start, uint end) {
170 struct ast *child, *parent;
171 struct state *state = rec_state->state;
173 parent = rec_state->ast;
177 child = make_ast(lens);
178 ERR_NOMEM(child == NULL, state->info);
180 ast_set(child, start, end);
181 if (parent->nchildren >= parent->capacity) {
182 ret = REALLOC_N(parent->children, parent->capacity * 2);
183 ERR_NOMEM(ret < 0, state->info);
184 parent->capacity = parent->capacity * 2;
186 parent->children[parent->nchildren++] = child;
187 child->parent = parent;
195 /* pop the ast from one level, fail the parent is NULL */
196 static void ast_pop(struct rec_state *rec_state) {
197 ensure(rec_state->ast != NULL && rec_state->ast->parent != NULL, rec_state->state->info);
198 rec_state->ast = rec_state->ast->parent;
203 static void print_ast(const struct ast *ast, int lvl) {
208 for (i = 0; i < lvl; i++) fputs(" ", stdout);
209 lns = format_lens(ast->lens);
210 printf("%d..%d %s\n", ast->start, ast->end, lns);
212 for (i = 0; i < ast->nchildren; i++) {
213 print_ast(ast->children[i], lvl + 1);
217 static void get_error(struct state *state, struct lens *lens,
218 const char *format, ...)
219 ATTRIBUTE_FORMAT(printf, 3, 4);
221 void free_lns_error(struct lns_error *err) {
226 unref(err->lens, lens);
230 static void vget_error(struct state *state, struct lens *lens,
231 const char *format, va_list ap) {
234 if (state->error != NULL)
236 if (ALLOC(state->error) < 0)
238 state->error->lens = ref(lens);
239 if (REG_MATCHED(state))
240 state->error->pos = REG_END(state);
242 state->error->pos = 0;
243 r = vasprintf(&state->error->message, format, ap);
245 state->error->message = NULL;
248 static void get_error(struct state *state, struct lens *lens,
249 const char *format, ...)
253 va_start(ap, format);
254 vget_error(state, lens, format, ap);
258 static struct skel *make_skel(struct lens *lens) {
260 enum lens_tag tag = lens->tag;
266 void free_skel(struct skel *skel) {
269 if (skel->tag == L_CONCAT || skel->tag == L_STAR || skel->tag == L_MAYBE ||
270 skel->tag == L_SQUARE) {
271 while (skel->skels != NULL) {
272 struct skel *del = skel->skels;
273 skel->skels = del->next;
276 } else if (skel->tag == L_DEL) {
282 static void print_skel(struct skel *skel);
283 static void print_skel_list(struct skel *skels, const char *beg,
284 const char *sep, const char *end) {
286 list_for_each(s, skels) {
294 static void print_skel(struct skel *skel) {
297 if (skel->text == NULL) {
301 print_chars(stdout, skel->text, -1);
306 print_skel_list(skel->skels, "", " . ", "");
309 print_skel_list(skel->skels, "(", " ", ")*");
312 print_skel_list(skel->skels, "(", " ", ")?");
315 print_skel_list(skel->skels, "[", " ", "]");
323 // DICT_DUMP is only used for debugging
325 static void print_dict(struct dict *dict, int indent) {
326 list_for_each(d, dict) {
327 printf("%*s%s:\n", indent, "", d->key);
328 list_for_each(e, d->entry) {
329 printf("%*s", indent+2, "");
332 print_dict(e->dict, indent+2);
338 static void get_expected_error(struct state *state, struct lens *l) {
339 /* Size of the excerpt of the input text we'll show */
340 static const int wordlen = 10;
341 char word[wordlen+1];
344 if (REG_MATCHED(state))
345 strncpy(word, REG_POS(state), wordlen);
347 strncpy(word, state->text, wordlen);
348 word[wordlen] = '\0';
349 for (p = word; *p != '\0' && *p != '\n'; p++);
352 pat = escape(l->ctype->pattern->str, -1, NULL);
353 get_error(state, l, "expected %s at '%s'", pat, word);
358 * Splitting of the input text
361 static char *token(struct state *state) {
362 ensure0(REG_MATCHED(state), state->info);
363 return strndup(REG_POS(state), REG_SIZE(state));
366 static char *token_range(const char *text, uint start, uint end) {
367 return strndup(text + start, end - start);
370 static void regexp_match_error(struct state *state, struct lens *lens,
371 int count, struct regexp *r) {
373 char *pat = regexp_escape(r);
375 if (state->regs != NULL)
376 text = strndup(REG_POS(state), REG_SIZE(state));
378 text = strdup("(unknown)");
381 get_error(state, lens, "Failed to match /%s/ with %s", pat, text);
382 } else if (count == -2) {
383 get_error(state, lens, "Internal error matching /%s/ with %s",
385 } else if (count == -3) {
386 /* Should have been caught by the typechecker */
387 get_error(state, lens, "Syntax error in regexp /%s/", pat);
393 static void no_match_error(struct state *state, struct lens *lens) {
394 ensure(lens->tag == L_KEY || lens->tag == L_DEL
395 || lens->tag == L_STORE, state->info);
396 char *pat = regexp_escape(lens->ctype);
397 const char *lname = "(lname)";
398 if (lens->tag == L_KEY)
400 else if (lens->tag == L_DEL)
402 else if (lens->tag == L_STORE)
404 get_error(state, lens, "no match for %s /%s/", lname, pat);
410 /* Modifies STATE->REGS and STATE->NREG. The caller must save these
411 * if they are still needed
413 * Return the number of characters matched
415 static int match(struct state *state, struct lens *lens,
416 struct regexp *re, uint size, uint start) {
417 struct re_registers *regs;
423 count = regexp_match(re, state->text, size, start, regs);
425 regexp_match_error(state, lens, count, re);
434 static void free_regs(struct state *state) {
435 if (state->regs != NULL) {
436 free(state->regs->start);
437 free(state->regs->end);
442 static struct tree *get_lens(struct lens *lens, struct state *state);
443 static struct skel *parse_lens(struct lens *lens, struct state *state,
446 static void free_seqs(struct seq *seqs) {
447 /* Do not free seq->name; it's not owned by the seq, but by some lens */
451 static struct seq *find_seq(const char *name, struct state *state) {
452 ensure0(name != NULL, state->info);
455 for (seq=state->seqs;
456 seq != NULL && STRNEQ(seq->name, name);
463 list_append(state->seqs, seq);
469 static struct tree *get_seq(struct lens *lens, struct state *state) {
470 ensure0(lens->tag == L_SEQ, state->info);
471 struct seq *seq = find_seq(lens->string->str, state);
474 r = asprintf((char **) &(state->key), "%d", seq->value);
475 ERR_NOMEM(r < 0, state->info);
482 static struct skel *parse_seq(struct lens *lens, struct state *state) {
483 get_seq(lens, state);
484 return make_skel(lens);
487 static struct tree *get_counter(struct lens *lens, struct state *state) {
488 ensure0(lens->tag == L_COUNTER, state->info);
489 struct seq *seq = find_seq(lens->string->str, state);
494 static struct skel *parse_counter(struct lens *lens, struct state *state) {
495 get_counter(lens, state);
496 return make_skel(lens);
499 static struct tree *get_del(struct lens *lens, struct state *state) {
500 ensure0(lens->tag == L_DEL, state->info);
501 if (! REG_MATCHED(state)) {
502 char *pat = regexp_escape(lens->ctype);
503 get_error(state, lens, "no match for del /%s/", pat);
506 update_span(state->span, REG_START(state), REG_END(state));
510 static struct skel *parse_del(struct lens *lens, struct state *state) {
511 ensure0(lens->tag == L_DEL, state->info);
512 struct skel *skel = NULL;
514 skel = make_skel(lens);
515 if (! REG_MATCHED(state))
516 no_match_error(state, lens);
518 skel->text = token(state);
522 static struct tree *get_store(struct lens *lens, struct state *state) {
523 ensure0(lens->tag == L_STORE, state->info);
524 ensure0(state->value == NULL, state->info);
526 struct tree *tree = NULL;
528 if (state->value != NULL)
529 get_error(state, lens, "More than one store in a subtree");
530 else if (! REG_MATCHED(state))
531 no_match_error(state, lens);
533 state->value = token(state);
535 state->span->value_start = REG_START(state);
536 state->span->value_end = REG_END(state);
537 update_span(state->span, REG_START(state), REG_END(state));
543 static struct skel *parse_store(struct lens *lens,
544 ATTRIBUTE_UNUSED struct state *state) {
545 ensure0(lens->tag == L_STORE, state->info);
546 return make_skel(lens);
549 static struct tree *get_value(struct lens *lens, struct state *state) {
550 ensure0(lens->tag == L_VALUE, state->info);
551 state->value = strdup(lens->string->str);
555 static struct skel *parse_value(struct lens *lens,
556 ATTRIBUTE_UNUSED struct state *state) {
557 ensure0(lens->tag == L_VALUE, state->info);
558 return make_skel(lens);
561 static struct tree *get_key(struct lens *lens, struct state *state) {
562 ensure0(lens->tag == L_KEY, state->info);
563 if (! REG_MATCHED(state))
564 no_match_error(state, lens);
566 state->key = token(state);
568 state->span->label_start = REG_START(state);
569 state->span->label_end = REG_END(state);
570 update_span(state->span, REG_START(state), REG_END(state));
576 static struct skel *parse_key(struct lens *lens, struct state *state) {
577 get_key(lens, state);
578 return make_skel(lens);
581 static struct tree *get_label(struct lens *lens, struct state *state) {
582 ensure0(lens->tag == L_LABEL, state->info);
583 state->key = strdup(lens->string->str);
587 static struct skel *parse_label(struct lens *lens, struct state *state) {
588 get_label(lens, state);
589 return make_skel(lens);
592 static struct tree *get_union(struct lens *lens, struct state *state) {
593 ensure0(lens->tag == L_UNION, state->info);
595 struct tree *tree = NULL;
597 uint old_nreg = state->nreg;
600 for (int i=0; i < lens->nchildren; i++) {
601 if (REG_MATCHED(state)) {
602 tree = get_lens(lens->children[i], state);
606 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
608 state->nreg = old_nreg;
610 get_expected_error(state, lens);
614 static struct skel *parse_union(struct lens *lens, struct state *state,
615 struct dict **dict) {
616 ensure0(lens->tag == L_UNION, state->info);
618 struct skel *skel = NULL;
620 uint old_nreg = state->nreg;
623 for (int i=0; i < lens->nchildren; i++) {
624 struct lens *l = lens->children[i];
625 if (REG_MATCHED(state)) {
626 skel = parse_lens(l, state, dict);
630 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
632 state->nreg = old_nreg;
634 get_expected_error(state, lens);
639 static struct tree *get_concat(struct lens *lens, struct state *state) {
640 ensure0(lens->tag == L_CONCAT, state->info);
642 struct tree *tree = NULL;
643 uint old_nreg = state->nreg;
646 for (int i=0; i < lens->nchildren; i++) {
647 struct tree *t = NULL;
648 if (! REG_VALID(state)) {
649 get_error(state, lens->children[i],
650 "Not enough components in concat");
652 state->nreg = old_nreg;
656 t = get_lens(lens->children[i], state);
657 list_append(tree, t);
658 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
660 state->nreg = old_nreg;
665 static struct skel *parse_concat(struct lens *lens, struct state *state,
666 struct dict **dict) {
667 ensure0(lens->tag == L_CONCAT, state->info);
668 struct skel *skel = make_skel(lens);
669 uint old_nreg = state->nreg;
672 for (int i=0; i < lens->nchildren; i++) {
673 struct skel *sk = NULL;
674 struct dict *di = NULL;
675 if (! REG_VALID(state)) {
676 get_error(state, lens->children[i],
677 "Not enough components in concat");
682 sk = parse_lens(lens->children[i], state, &di);
683 list_append(skel->skels, sk);
684 dict_append(dict, di);
685 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
687 state->nreg = old_nreg;
692 /* Given a lens that does not match at the current position, try to find
693 * the left-most child LAST that does match and the lens next to it NEXT
694 * that does not match. Return the length of the match.
696 * If no such child exists, return 0
698 static int try_match(struct lens *lens, struct state *state,
699 uint start, uint end,
700 struct lens **last, struct lens **next) {
714 result = regexp_match(lens->ctype, state->text, end, start, NULL);
719 for (int i=0; i < lens->nchildren; i++) {
720 struct lens *child = lens->children[i];
721 struct lens *next_child =
722 (i < lens->nchildren - 1) ? lens->children[i+1] : NULL;
724 r = regexp_match(child->ctype, state->text, end, start, NULL);
729 } else if (result > 0) {
734 result = try_match(child, state, start, end, last, next);
735 if (result > 0 && *next == NULL)
743 for (int i=0; i < lens->nchildren; i++) {
744 struct lens *child = lens->children[i];
745 result = try_match(child, state, start, end, last, next);
755 return try_match(lens->child, state, start, end, last, next);
758 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
765 static void short_iteration_error(struct lens *lens, struct state *state,
766 uint start, uint end) {
769 get_error(state, lens, "%s", short_iteration);
771 match_count = try_match(lens->child, state, start, end,
772 &state->error->last, &state->error->next);
773 state->error->pos = start + match_count;
776 static struct tree *get_quant_star(struct lens *lens, struct state *state) {
777 ensure0(lens->tag == L_STAR, state->info);
778 struct lens *child = lens->child;
779 struct tree *tree = NULL, *tail = NULL;
780 struct re_registers *old_regs = state->regs;
781 uint old_nreg = state->nreg;
782 uint end = REG_END(state);
783 uint start = REG_START(state);
784 uint size = end - start;
787 while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
788 struct tree *t = NULL;
790 t = get_lens(lens->child, state);
791 list_tail_cons(tree, tail, t);
793 start += REG_SIZE(state);
794 size -= REG_SIZE(state);
798 state->regs = old_regs;
799 state->nreg = old_nreg;
801 short_iteration_error(lens, state, start, end);
806 static struct skel *parse_quant_star(struct lens *lens, struct state *state,
807 struct dict **dict) {
808 ensure0(lens->tag == L_STAR, state->info);
809 struct lens *child = lens->child;
810 struct skel *skel = make_skel(lens), *tail = NULL;
811 struct re_registers *old_regs = state->regs;
812 uint old_nreg = state->nreg;
813 uint end = REG_END(state);
814 uint start = REG_START(state);
815 uint size = end - start;
819 while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
821 struct dict *di = NULL;
823 sk = parse_lens(lens->child, state, &di);
824 list_tail_cons(skel->skels, tail, sk);
825 dict_append(dict, di);
827 start += REG_SIZE(state);
828 size -= REG_SIZE(state);
832 state->regs = old_regs;
833 state->nreg = old_nreg;
835 get_error(state, lens, "%s", short_iteration);
840 static struct tree *get_quant_maybe(struct lens *lens, struct state *state) {
841 ensure0(lens->tag == L_MAYBE, state->info);
842 struct tree *tree = NULL;
844 /* Check that our child matched. For a construct like (r)?, the
845 * matcher will report a zero length match for group 0, even if r
846 * does not match at all
849 if (REG_MATCHED(state)) {
850 tree = get_lens(lens->child, state);
856 static struct skel *parse_quant_maybe(struct lens *lens, struct state *state,
857 struct dict **dict) {
858 ensure0(lens->tag == L_MAYBE, state->info);
860 struct skel *skel = NULL;
863 if (REG_MATCHED(state)) {
864 skel = parse_lens(lens->child, state, dict);
868 skel = make_skel(lens);
872 static struct tree *get_subtree(struct lens *lens, struct state *state) {
873 char *key = state->key;
874 char *value = state->value;
875 struct span *span = state->span;
877 struct tree *tree = NULL, *children;
881 if (state->info->flags & AUG_ENABLE_SPAN) {
882 state->span = make_span(state->info);
883 ERR_NOMEM(state->span == NULL, state->info);
886 children = get_lens(lens->child, state);
888 tree = make_tree(state->key, state->value, NULL, children);
889 ERR_NOMEM(tree == NULL, state->info);
890 tree->span = state->span;
892 if (state->span != NULL) {
893 update_span(span, state->span->span_start, state->span->span_end);
897 state->value = value;
904 static struct skel *parse_subtree(struct lens *lens, struct state *state,
905 struct dict **dict) {
906 char *key = state->key;
908 struct dict *di = NULL;
911 skel = parse_lens(lens->child, state, &di);
912 *dict = make_dict(state->key, skel, di);
914 return make_skel(lens);
917 /* Check if left and right strings matches according to the square lens
920 * Returns 1 if strings matches, 0 otherwise
922 static int square_match(struct lens *lens, char *left, char *right) {
924 struct lens *concat = NULL;
926 // if one of the argument is NULL, returns no match
927 if (left == NULL || right == NULL || lens == NULL)
930 concat = lens->child;
931 /* If either right or left lens is nocase, then ignore case */
932 if (child_first(concat)->ctype->nocase ||
933 child_last(concat)->ctype->nocase) {
934 cmp = STRCASEEQ(left, right);
936 cmp = STREQ(left, right);
942 * This function applies only for non-recursive lens, handling of recursive
943 * square is done in visit_exit().
945 static struct tree *get_square(struct lens *lens, struct state *state) {
946 ensure0(lens->tag == L_SQUARE, state->info);
948 struct lens *concat = lens->child;
949 struct tree *tree = NULL;
950 struct re_registers *old_regs = state->regs;
951 uint old_nreg = state->nreg;
952 uint end = REG_END(state);
953 uint start = REG_START(state);
954 char *rsqr = NULL, *lsqr = NULL;
957 r = match(state, lens->child, lens->child->ctype, end, start);
958 ERR_NOMEM(r < 0, state->info);
960 tree = get_lens(lens->child, state);
962 /* retrieve left component */
964 start = REG_START(state);
965 end = REG_END(state);
966 lsqr = token_range(state->text, start, end);
968 /* retrieve right component */
969 /* compute nreg for the last children */
970 for (int i = 0; i < concat->nchildren - 1; i++)
971 state->nreg += 1 + regexp_nsub(concat->children[i]->ctype);
973 start = REG_START(state);
974 end = REG_END(state);
975 rsqr = token_range(state->text, start, end);
977 if (!square_match(lens, lsqr, rsqr)) {
978 get_error(state, lens, "%s \"%s\" %s \"%s\"",
979 "Parse error: mismatched in square lens, expecting", lsqr,
986 state->nreg = old_nreg;
987 state->regs = old_regs;
998 static struct skel *parse_square(struct lens *lens, struct state *state,
999 struct dict **dict) {
1000 ensure0(lens->tag == L_SQUARE, state->info);
1001 struct re_registers *old_regs = state->regs;
1002 uint old_nreg = state->nreg;
1003 uint end = REG_END(state);
1004 uint start = REG_START(state);
1005 struct skel *skel = NULL, *sk = NULL;
1008 r = match(state, lens->child, lens->child->ctype, end, start);
1009 ERR_NOMEM(r < 0, state->info);
1011 skel = parse_lens(lens->child, state, dict);
1014 sk = make_skel(lens);
1019 state->regs = old_regs;
1020 state->nreg = old_nreg;
1025 * Helpers for recursive lenses
1029 static void print_frames(struct rec_state *state) {
1030 for (int j = state->fused - 1; j >=0; j--) {
1031 struct frame *f = state->frames + j;
1032 for (int i=0; i < state->lvl; i++) fputc(' ', stderr);
1033 fprintf(stderr, "%2d %s %s", j, f->key, f->value);
1034 if (f->tree == NULL) {
1035 fprintf(stderr, " - ");
1037 fprintf(stderr, " { %s = %s } ", f->tree->label, f->tree->value);
1039 fprintf(stderr, "%s\n", format_lens(f->lens));
1044 static struct frame *top_frame(struct rec_state *state) {
1045 ensure0(state->fsize > 0, state->state->info);
1046 return state->frames + state->fused - 1;
1049 /* The nth frame from the top of the stack, where 0th frame is the top */
1051 static struct frame *nth_frame(struct rec_state *state, uint n) {
1052 assert(state->fsize > n);
1053 return state->frames + state->fused - (n+1);
1056 static struct frame *push_frame(struct rec_state *state, struct lens *lens) {
1059 if (state->fused >= state->fsize) {
1060 uint expand = state->fsize;
1063 r = REALLOC_N(state->frames, state->fsize + expand);
1064 ERR_NOMEM(r < 0, state->state->info);
1065 state->fsize += expand;
1070 struct frame *top = top_frame(state);
1078 static struct frame *pop_frame(struct rec_state *state) {
1079 ensure0(state->fused > 0, state->state->info);
1082 if (state->fused > 0)
1083 return top_frame(state);
1088 static void dbg_visit(struct lens *lens, char action, size_t start, size_t end,
1089 int fused, int lvl) {
1091 for (int i=0; i < lvl; i++)
1093 lns = format_lens(lens);
1094 fprintf(stderr, "%c %zd..%zd %d %s\n", action, start, end,
1099 static void get_terminal(struct frame *top, struct lens *lens,
1100 struct state *state) {
1101 top->tree = get_lens(lens, state);
1102 top->key = state->key;
1103 top->value = state->value;
1105 state->value = NULL;
1108 static void parse_terminal(struct frame *top, struct lens *lens,
1109 struct state *state) {
1111 top->skel = parse_lens(lens, state, &top->dict);
1112 top->key = state->key;
1116 static void visit_terminal(struct lens *lens, size_t start, size_t end,
1118 struct rec_state *rec_state = data;
1119 struct state *state = rec_state->state;
1120 struct re_registers *old_regs = state->regs;
1122 uint old_nreg = state->nreg;
1124 if (state->error != NULL)
1127 if (debugging("cf.get"))
1128 dbg_visit(lens, 'T', start, end, rec_state->fused, rec_state->lvl);
1129 match(state, lens, lens->ctype, end, start);
1130 struct frame *top = push_frame(rec_state, lens);
1131 if (rec_state->mode == M_GET)
1132 get_terminal(top, lens, state);
1134 parse_terminal(top, lens, state);
1135 child = ast_append(rec_state, lens, start, end);
1136 ERR_NOMEM(child == NULL, state->info);
1139 state->regs = old_regs;
1140 state->nreg = old_nreg;
1143 static void visit_enter(struct lens *lens,
1144 ATTRIBUTE_UNUSED size_t start,
1145 ATTRIBUTE_UNUSED size_t end,
1147 struct rec_state *rec_state = data;
1148 struct state *state = rec_state->state;
1151 if (state->error != NULL)
1154 if (debugging("cf.get"))
1155 dbg_visit(lens, '{', start, end, rec_state->fused, rec_state->lvl);
1156 rec_state->lvl += 1;
1157 if (lens->tag == L_SUBTREE) {
1158 /* Same for parse and get */
1159 struct frame *f = push_frame(rec_state, lens);
1160 f->key = state->key;
1161 f->value = state->value;
1162 f->span = state->span;
1164 state->value = NULL;
1165 } else if (lens->tag == L_MAYBE) {
1166 push_frame(rec_state, lens);
1167 if (state->info->flags & AUG_ENABLE_SPAN) {
1168 state->span = make_span(state->info);
1169 ERR_NOMEM(state->span == NULL, state->info);
1172 child = ast_append(rec_state, lens, start, end);
1174 rec_state->ast = child;
1179 static void get_combine(struct rec_state *rec_state,
1180 struct lens *lens, uint n) {
1181 struct tree *tree = NULL, *tail = NULL;
1182 char *key = NULL, *value = NULL;
1183 struct frame *top = NULL;
1186 top = top_frame(rec_state);
1188 for (int i=0; i < n; i++, top = pop_frame(rec_state)) {
1189 list_tail_cons(tree, tail, top->tree);
1190 /* top->tree might have more than one node, update tail */
1192 while (tail->next != NULL) tail = tail->next;
1194 if (top->key != NULL) {
1195 ensure(key == NULL, rec_state->state->info);
1198 if (top->value != NULL) {
1199 ensure(value == NULL, rec_state->state->info);
1203 top = push_frame(rec_state, lens);
1211 static void parse_combine(struct rec_state *rec_state,
1212 struct lens *lens, uint n) {
1213 struct skel *skel = make_skel(lens), *tail = NULL;
1214 struct dict *dict = NULL;
1216 struct frame *top = NULL;
1219 top = top_frame(rec_state);
1221 for (int i=0; i < n; i++, top = pop_frame(rec_state)) {
1222 list_tail_cons(skel->skels, tail, top->skel);
1223 /* top->skel might have more than one node, update skel */
1225 while (tail->next != NULL) tail = tail->next;
1226 dict_append(&dict, top->dict);
1227 if (top->key != NULL) {
1228 ensure(key == NULL, rec_state->state->info);
1232 top = push_frame(rec_state, lens);
1240 static void visit_exit(struct lens *lens,
1241 ATTRIBUTE_UNUSED size_t start,
1242 ATTRIBUTE_UNUSED size_t end,
1244 struct rec_state *rec_state = data;
1245 struct state *state = rec_state->state;
1247 if (state->error != NULL)
1250 rec_state->lvl -= 1;
1251 if (debugging("cf.get"))
1252 dbg_visit(lens, '}', start, end, rec_state->fused, rec_state->lvl);
1254 ERR_BAIL(lens->info);
1256 if (lens->tag == L_SUBTREE) {
1257 struct frame *top = top_frame(rec_state);
1258 if (rec_state->mode == M_GET) {
1260 // FIXME: tree may leak if pop_frame ensure0 fail
1261 tree = make_tree(top->key, top->value, NULL, top->tree);
1262 tree->span = state->span;
1263 ERR_NOMEM(tree == NULL, lens->info);
1264 top = pop_frame(rec_state);
1265 ensure(lens == top->lens, state->info);
1266 state->key = top->key;
1267 state->value = top->value;
1268 state->span = top->span;
1269 pop_frame(rec_state);
1270 top = push_frame(rec_state, lens);
1275 skel = make_skel(lens);
1276 ERR_NOMEM(skel == NULL, lens->info);
1277 dict = make_dict(top->key, top->skel, top->dict);
1278 ERR_NOMEM(dict == NULL, lens->info);
1279 top = pop_frame(rec_state);
1280 ensure(lens == top->lens, state->info);
1281 state->key = top->key;
1282 pop_frame(rec_state);
1283 top = push_frame(rec_state, lens);
1287 } else if (lens->tag == L_CONCAT) {
1288 ensure(rec_state->fused >= lens->nchildren, state->info);
1289 for (int i = 0; i < lens->nchildren; i++) {
1290 struct frame *fr = nth_frame(rec_state, i);
1291 BUG_ON(lens->children[i] != fr->lens,
1293 "Unexpected lens in concat %zd..%zd\n Expected: %s\n Actual: %s",
1295 format_lens(lens->children[i]),
1296 format_lens(fr->lens));
1298 if (rec_state->mode == M_GET)
1299 get_combine(rec_state, lens, lens->nchildren);
1301 parse_combine(rec_state, lens, lens->nchildren);
1302 } else if (lens->tag == L_STAR) {
1304 while (n < rec_state->fused &&
1305 nth_frame(rec_state, n)->lens == lens->child)
1307 if (rec_state->mode == M_GET)
1308 get_combine(rec_state, lens, n);
1310 parse_combine(rec_state, lens, n);
1311 } else if (lens->tag == L_MAYBE) {
1313 if (rec_state->fused > 0
1314 && top_frame(rec_state)->lens == lens->child) {
1317 if (rec_state->mode == M_GET)
1318 get_combine(rec_state, lens, n);
1320 parse_combine(rec_state, lens, n);
1321 } else if (lens->tag == L_SQUARE) {
1322 if (rec_state->mode == M_GET) {
1323 struct ast *square, *concat, *right, *left;
1327 square = rec_state->ast;
1328 concat = child_first(square);
1329 right = child_first(concat);
1330 left = child_last(concat);
1331 lsqr = token_range(state->text, left->start, left->end);
1332 rsqr = token_range(state->text, right->start, right->end);
1333 ret = square_match(lens, lsqr, rsqr);
1335 get_error(state, lens, "%s \"%s\" %s \"%s\"",
1336 "Parse error: mismatched in square lens, expecting", lsqr,
1343 get_combine(rec_state, lens, 1);
1345 parse_combine(rec_state, lens, 1);
1348 top_frame(rec_state)->lens = lens;
1355 static void visit_error(struct lens *lens, void *data, size_t pos,
1356 const char *format, ...) {
1357 struct rec_state *rec_state = data;
1360 va_start(ap, format);
1361 vget_error(rec_state->state, lens, format, ap);
1363 rec_state->state->error->pos = rec_state->start + pos;
1366 static struct frame *rec_process(enum mode_t mode, struct lens *lens,
1367 struct state *state) {
1368 uint end = REG_END(state);
1369 uint start = REG_START(state);
1371 struct re_registers *old_regs = state->regs;
1372 uint old_nreg = state->nreg;
1374 struct jmt_visitor visitor;
1375 struct rec_state rec_state;
1377 struct frame *f = NULL;
1379 MEMZERO(&rec_state, 1);
1380 MEMZERO(&visitor, 1);
1382 if (lens->jmt == NULL) {
1383 lens->jmt = jmt_build(lens);
1384 ERR_BAIL(lens->info);
1390 rec_state.mode = mode;
1391 rec_state.state = state;
1392 rec_state.fused = 0;
1394 rec_state.start = start;
1395 rec_state.ast = make_ast(lens);
1396 ERR_NOMEM(rec_state.ast == NULL, state->info);
1398 visitor.parse = jmt_parse(lens->jmt, state->text + start, end - start);
1399 ERR_BAIL(lens->info);
1400 visitor.terminal = visit_terminal;
1401 visitor.enter = visit_enter;
1402 visitor.exit = visit_exit;
1403 visitor.error = visit_error;
1404 visitor.data = &rec_state;
1405 r = jmt_visit(&visitor, &len);
1406 ERR_BAIL(lens->info);
1408 get_error(state, lens, "Syntax error");
1409 state->error->pos = start + len;
1411 if (rec_state.fused == 0) {
1412 get_error(state, lens,
1413 "Parse did not leave a result on the stack");
1415 } else if (rec_state.fused > 1) {
1416 get_error(state, lens,
1417 "Parse left additional garbage on the stack");
1421 rec_state.ast = ast_root(rec_state.ast);
1422 ensure(rec_state.ast->parent == NULL, state->info);
1424 if (debugging("cf.get.ast"))
1425 print_ast(ast_root(rec_state.ast), 0);
1426 state->regs = old_regs;
1427 state->nreg = old_nreg;
1428 jmt_free_parse(visitor.parse);
1429 free_ast(ast_root(rec_state.ast));
1430 return rec_state.frames;
1433 for(i = 0; i < rec_state.fused; i++) {
1434 f = nth_frame(&rec_state, i);
1436 if (mode == M_GET) {
1439 } else if (mode == M_PARSE) {
1444 FREE(rec_state.frames);
1448 static struct tree *get_rec(struct lens *lens, struct state *state) {
1450 struct tree *tree = NULL;
1452 fr = rec_process(M_GET, lens, state);
1455 state->key = fr->key;
1456 state->value = fr->value;
1462 static struct skel *parse_rec(struct lens *lens, struct state *state,
1463 struct dict **dict) {
1464 struct skel *skel = NULL;
1467 fr = rec_process(M_PARSE, lens, state);
1471 state->key = fr->key;
1477 static struct tree *get_lens(struct lens *lens, struct state *state) {
1478 struct tree *tree = NULL;
1482 tree = get_del(lens, state);
1485 tree = get_store(lens, state);
1488 tree = get_value(lens, state);
1491 tree = get_key(lens, state);
1494 tree = get_label(lens, state);
1497 tree = get_seq(lens, state);
1500 tree = get_counter(lens, state);
1503 tree = get_concat(lens, state);
1506 tree = get_union(lens, state);
1509 tree = get_subtree(lens, state);
1512 tree = get_quant_star(lens, state);
1515 tree = get_quant_maybe(lens, state);
1518 tree = get_square(lens, state);
1521 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
1528 /* Initialize registers. Return 0 if the lens matches the entire text, 1 if
1529 * it does not and -1 on error.
1531 static int init_regs(struct state *state, struct lens *lens, uint size) {
1534 if (lens->tag != L_STAR && ! lens->recursive) {
1535 r = match(state, lens, lens->ctype, size, 0);
1537 get_error(state, lens, "Input string does not match at all");
1542 /* Special case the very common situation that the lens is (l)*
1543 * We can avoid matching the entire text in that case - that
1544 * match can be very expensive
1546 if (ALLOC(state->regs) < 0)
1548 state->regs->num_regs = 1;
1549 if (ALLOC(state->regs->start) < 0 || ALLOC(state->regs->end) < 0)
1551 state->regs->start[0] = 0;
1552 state->regs->end[0] = size;
1556 struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
1557 struct lns_error **err) {
1559 struct tree *tree = NULL;
1560 uint size = strlen(text);
1564 r = ALLOC(state.info);
1565 ERR_NOMEM(r < 0, info);
1567 *state.info = *info;
1568 state.info->ref = UINT_MAX;
1572 /* We are probably being overly cautious here: if the lens can't process
1573 * all of TEXT, we should really fail somewhere in one of the sublenses.
1574 * But to be safe, we check that we can process everything anyway, then
1575 * try to process, hoping we'll get a more specific error, and if that
1576 * fails, we throw our arms in the air and say 'something went wrong'
1578 partial = init_regs(&state, lens, size);
1580 if (lens->recursive)
1581 tree = get_rec(lens, &state);
1583 tree = get_lens(lens, &state);
1586 free_seqs(state.seqs);
1587 if (state.key != NULL) {
1588 get_error(&state, lens, "get left unused key %s", state.key);
1591 if (state.value != NULL) {
1592 get_error(&state, lens, "get left unused value %s", state.value);
1595 if (partial && state.error == NULL) {
1596 get_error(&state, lens, "Get did not match entire input");
1606 if (state.error != NULL) {
1610 free_lns_error(state.error);
1615 static struct skel *parse_lens(struct lens *lens, struct state *state,
1616 struct dict **dict) {
1617 struct skel *skel = NULL;
1621 skel = parse_del(lens, state);
1624 skel = parse_store(lens, state);
1627 skel = parse_value(lens, state);
1630 skel = parse_key(lens, state);
1633 skel = parse_label(lens, state);
1636 skel = parse_seq(lens, state);
1639 skel = parse_counter(lens, state);
1642 skel = parse_concat(lens, state, dict);
1645 skel = parse_union(lens, state, dict);
1648 skel = parse_subtree(lens, state, dict);
1651 skel = parse_quant_star(lens, state, dict);
1654 skel = parse_quant_maybe(lens, state, dict);
1657 skel = parse_square(lens, state, dict);
1660 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
1667 struct skel *lns_parse(struct lens *lens, const char *text, struct dict **dict,
1668 struct lns_error **err) {
1670 struct skel *skel = NULL;
1671 uint size = strlen(text);
1675 r = ALLOC(state.info);
1676 ERR_NOMEM(r< 0, lens->info);
1677 state.info->ref = UINT_MAX;
1678 state.info->error = lens->info->error;
1683 partial = init_regs(&state, lens, size);
1686 if (lens->recursive)
1687 skel = parse_rec(lens, &state, dict);
1689 skel = parse_lens(lens, &state, dict);
1691 free_seqs(state.seqs);
1692 if (state.error != NULL) {
1698 if (state.key != NULL) {
1699 get_error(&state, lens, "parse left unused key %s", state.key);
1702 if (state.value != NULL) {
1703 get_error(&state, lens, "parse left unused value %s", state.value);
1707 // This should never happen during lns_parse
1708 get_error(&state, lens, "parse can not process entire input");
1717 free_lns_error(state.error);
1724 * indent-tabs-mode: nil