2 * parser.c: parse a configuration file according to a grammar
4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
36 /* Our favorite error message */
37 static const char *const short_iteration =
38 "Iterated lens matched less than it should";
52 char *value; /* GET_STORE leaves a value here */
53 struct lns_error *error;
55 /* We use the registers from a regular expression match to keep track
56 * of the substring we are currently looking at. REGS are the registers
57 * from the last regexp match; NREG is the number of the register
58 * in REGS that describes the substring we are currently looking at.
60 * We adjust NREG as we traverse lenses either because we know the
61 * grouping structure of lenses (for L_STAR, the child lens is always
62 * in NREG + 1) or by looking at the number of groups in a sublens (as
63 * we move from one child of L_CONCAT to the next, we need to add 1 +
64 * number of groups of that child to NREG) How NREG is adjusted is
65 * closely related to how the REGEXP_* routines build up bigger regexps
68 struct re_registers *regs;
72 /* Used by recursive lenses to stack intermediate results
73 Frames are used for a few different purposes:
75 (1) to save results from individual lenses that are later gathered and
76 combined by combinators like L_STAR and L_CONCAT
77 (2) to preserve parse state when descending into a L_SUBTREE
78 (3) as a marker to determine if a L_MAYBE had a match or not
89 struct { /* M_PARSE */
96 /* Used by recursive lenses in get_rec and parse_rec */
97 enum mode_t { M_GET, M_PARSE };
99 /* Abstract Syntax Tree for recursive parse */
102 struct ast **children;
115 struct frame *frames;
117 uint lvl; /* Debug only */
119 /* Will either be get_combine or parse_combine, depending on MODE, for
120 the duration of the whole recursive parse */
121 void (*combine)(struct rec_state *, struct lens *, uint);
124 #define REG_START(state) ((state)->regs->start[(state)->nreg])
125 #define REG_END(state) ((state)->regs->end[(state)->nreg])
126 #define REG_SIZE(state) (REG_END(state) - REG_START(state))
127 #define REG_POS(state) ((state)->text + REG_START(state))
128 #define REG_VALID(state) ((state)->regs != NULL && \
129 (state)->nreg < (state)->regs->num_regs)
130 #define REG_MATCHED(state) (REG_VALID(state) \
131 && (state)->regs->start[(state)->nreg] >= 0)
133 /* The macros SAVE_REGS and RESTORE_REGS are used to remember and restore
134 * where our caller was in processing their regexp match before we do
135 * matching ourselves (and would therefore clobber the caller's state)
137 * These two macros are admittedly horrible in that they declare local
138 * variables called OLD_REGS and OLD_NREG. The alternative to avoiding that
139 * would be to manually manage a stack of regs/nreg in STATE.
141 #define SAVE_REGS(state) \
142 struct re_registers *old_regs = state->regs; \
143 uint old_nreg = state->nreg; \
144 state->regs = NULL; \
147 #define RESTORE_REGS(state) \
149 state->regs = old_regs; \
150 state->nreg = old_nreg;
155 static struct ast *make_ast(struct lens *lens) {
156 struct ast *ast = NULL;
162 if (ALLOC_N(ast->children, ast->capacity) < 0) {
169 /* recursively free the node and all it's descendants */
170 static void free_ast(struct ast *ast) {
174 for (i = 0; i < ast->nchildren; i++) {
175 free_ast(ast->children[i]);
177 if (ast->children != NULL)
182 static struct ast *ast_root(struct ast *ast) {
183 struct ast *root = ast;
184 while(root != NULL && root->parent != NULL)
189 static void ast_set(struct ast *ast, uint start, uint end) {
196 /* append a child to the parent ast */
197 static struct ast *ast_append(struct rec_state *rec_state, struct lens *lens,
198 uint start, uint end) {
200 struct ast *child, *parent;
201 struct state *state = rec_state->state;
203 parent = rec_state->ast;
207 child = make_ast(lens);
208 ERR_NOMEM(child == NULL, state->info);
210 ast_set(child, start, end);
211 if (parent->nchildren >= parent->capacity) {
212 ret = REALLOC_N(parent->children, parent->capacity * 2);
213 ERR_NOMEM(ret < 0, state->info);
214 parent->capacity = parent->capacity * 2;
216 parent->children[parent->nchildren++] = child;
217 child->parent = parent;
225 /* pop the ast from one level, fail the parent is NULL */
226 static void ast_pop(struct rec_state *rec_state) {
227 ensure(rec_state->ast != NULL && rec_state->ast->parent != NULL, rec_state->state->info);
228 rec_state->ast = rec_state->ast->parent;
233 static void print_ast(const struct ast *ast, int lvl) {
238 for (i = 0; i < lvl; i++) fputs(" ", stdout);
239 lns = format_lens(ast->lens);
240 printf("%d..%d %s\n", ast->start, ast->end, lns);
242 for (i = 0; i < ast->nchildren; i++) {
243 print_ast(ast->children[i], lvl + 1);
247 static void get_error(struct state *state, struct lens *lens,
248 const char *format, ...)
249 ATTRIBUTE_FORMAT(printf, 3, 4);
251 void free_lns_error(struct lns_error *err) {
256 unref(err->lens, lens);
260 static void vget_error(struct state *state, struct lens *lens,
261 const char *format, va_list ap) {
264 if (state->error != NULL)
266 if (ALLOC(state->error) < 0)
268 state->error->lens = ref(lens);
269 if (REG_MATCHED(state))
270 state->error->pos = REG_END(state);
272 state->error->pos = 0;
273 r = vasprintf(&state->error->message, format, ap);
275 state->error->message = NULL;
278 static void get_error(struct state *state, struct lens *lens,
279 const char *format, ...)
283 va_start(ap, format);
284 vget_error(state, lens, format, ap);
288 static struct skel *make_skel(struct lens *lens) {
290 enum lens_tag tag = lens->tag;
297 void free_skel(struct skel *skel) {
300 if (skel->tag == L_CONCAT || skel->tag == L_STAR || skel->tag == L_MAYBE ||
301 skel->tag == L_SQUARE) {
302 while (skel->skels != NULL) {
303 struct skel *del = skel->skels;
304 skel->skels = del->next;
307 } else if (skel->tag == L_DEL) {
313 static void print_skel(struct skel *skel);
314 static void print_skel_list(struct skel *skels, const char *beg,
315 const char *sep, const char *end) {
317 list_for_each(s, skels) {
325 static void print_skel(struct skel *skel) {
328 if (skel->text == NULL) {
332 print_chars(stdout, skel->text, -1);
337 print_skel_list(skel->skels, "", " . ", "");
340 print_skel_list(skel->skels, "(", " ", ")*");
343 print_skel_list(skel->skels, "(", " ", ")?");
346 print_skel_list(skel->skels, "[", " ", "]");
354 // DICT_DUMP is only used for debugging
356 static void print_dict(struct dict *dict, int indent) {
357 list_for_each(d, dict) {
358 printf("%*s%s:\n", indent, "", d->key);
359 list_for_each(e, d->entry) {
360 printf("%*s", indent+2, "");
363 print_dict(e->dict, indent+2);
369 static void get_expected_error(struct state *state, struct lens *l) {
370 /* Size of the excerpt of the input text we'll show */
371 static const int wordlen = 10;
372 char word[wordlen+1];
375 if (REG_MATCHED(state))
376 strncpy(word, REG_POS(state), wordlen);
378 strncpy(word, state->text, wordlen);
379 word[wordlen] = '\0';
380 for (p = word; *p != '\0' && *p != '\n'; p++);
383 pat = escape(l->ctype->pattern->str, -1, NULL);
384 get_error(state, l, "expected %s at '%s'", pat, word);
389 * Splitting of the input text
392 static char *token(struct state *state) {
393 ensure0(REG_MATCHED(state), state->info);
394 return strndup(REG_POS(state), REG_SIZE(state));
397 static char *token_range(const char *text, uint start, uint end) {
398 return strndup(text + start, end - start);
401 static void regexp_match_error(struct state *state, struct lens *lens,
402 int count, struct regexp *r) {
404 char *pat = regexp_escape(r);
406 if (state->regs != NULL)
407 text = strndup(REG_POS(state), REG_SIZE(state));
409 text = strdup("(unknown)");
412 get_error(state, lens, "Failed to match /%s/ with %s", pat, text);
413 } else if (count == -2) {
414 get_error(state, lens, "Internal error matching /%s/ with %s",
416 } else if (count == -3) {
417 /* Should have been caught by the typechecker */
418 get_error(state, lens, "Syntax error in regexp /%s/", pat);
424 static void no_match_error(struct state *state, struct lens *lens) {
425 ensure(lens->tag == L_KEY || lens->tag == L_DEL
426 || lens->tag == L_STORE, state->info);
427 char *pat = regexp_escape(lens->ctype);
428 const char *lname = "(lname)";
429 if (lens->tag == L_KEY)
431 else if (lens->tag == L_DEL)
433 else if (lens->tag == L_STORE)
435 get_error(state, lens, "no match for %s /%s/", lname, pat);
441 /* Modifies STATE->REGS and STATE->NREG. The caller must save these
442 * if they are still needed
444 * Return the number of characters matched
446 static int match(struct state *state, struct lens *lens,
447 struct regexp *re, uint size, uint start) {
448 struct re_registers *regs;
454 count = regexp_match(re, state->text, size, start, regs);
456 regexp_match_error(state, lens, count, re);
465 static void free_regs(struct state *state) {
466 if (state->regs != NULL) {
467 free(state->regs->start);
468 free(state->regs->end);
473 static struct tree *get_lens(struct lens *lens, struct state *state);
474 static struct skel *parse_lens(struct lens *lens, struct state *state,
477 static void free_seqs(struct seq *seqs) {
478 /* Do not free seq->name; it's not owned by the seq, but by some lens */
482 static struct seq *find_seq(const char *name, struct state *state) {
483 ensure0(name != NULL, state->info);
486 for (seq=state->seqs;
487 seq != NULL && STRNEQ(seq->name, name);
495 list_append(state->seqs, seq);
501 static struct tree *get_seq(struct lens *lens, struct state *state) {
502 ensure0(lens->tag == L_SEQ, state->info);
503 struct seq *seq = find_seq(lens->string->str, state);
506 r = asprintf((char **) &(state->key), "%d", seq->value);
507 ERR_NOMEM(r < 0, state->info);
514 static struct skel *parse_seq(struct lens *lens, struct state *state) {
515 get_seq(lens, state);
516 return make_skel(lens);
519 static struct tree *get_counter(struct lens *lens, struct state *state) {
520 ensure0(lens->tag == L_COUNTER, state->info);
521 struct seq *seq = find_seq(lens->string->str, state);
526 static struct skel *parse_counter(struct lens *lens, struct state *state) {
527 get_counter(lens, state);
528 return make_skel(lens);
531 static struct tree *get_del(struct lens *lens, struct state *state) {
532 ensure0(lens->tag == L_DEL, state->info);
533 if (! REG_MATCHED(state)) {
534 char *pat = regexp_escape(lens->ctype);
535 get_error(state, lens, "no match for del /%s/", pat);
538 update_span(state->span, REG_START(state), REG_END(state));
542 static struct skel *parse_del(struct lens *lens, struct state *state) {
543 ensure0(lens->tag == L_DEL, state->info);
544 struct skel *skel = NULL;
546 skel = make_skel(lens);
547 if (! REG_MATCHED(state))
548 no_match_error(state, lens);
550 skel->text = token(state);
554 static struct tree *get_store(struct lens *lens, struct state *state) {
555 ensure0(lens->tag == L_STORE, state->info);
556 ensure0(state->value == NULL, state->info);
558 struct tree *tree = NULL;
560 if (state->value != NULL)
561 get_error(state, lens, "More than one store in a subtree");
562 else if (! REG_MATCHED(state))
563 no_match_error(state, lens);
565 state->value = token(state);
567 state->span->value_start = REG_START(state);
568 state->span->value_end = REG_END(state);
569 update_span(state->span, REG_START(state), REG_END(state));
575 static struct skel *parse_store(struct lens *lens,
576 ATTRIBUTE_UNUSED struct state *state) {
577 ensure0(lens->tag == L_STORE, state->info);
578 return make_skel(lens);
581 static struct tree *get_value(struct lens *lens, struct state *state) {
582 ensure0(lens->tag == L_VALUE, state->info);
583 state->value = strdup(lens->string->str);
587 static struct skel *parse_value(struct lens *lens,
588 ATTRIBUTE_UNUSED struct state *state) {
589 ensure0(lens->tag == L_VALUE, state->info);
590 return make_skel(lens);
593 static struct tree *get_key(struct lens *lens, struct state *state) {
594 ensure0(lens->tag == L_KEY, state->info);
595 if (! REG_MATCHED(state))
596 no_match_error(state, lens);
598 state->key = token(state);
600 state->span->label_start = REG_START(state);
601 state->span->label_end = REG_END(state);
602 update_span(state->span, REG_START(state), REG_END(state));
608 static struct skel *parse_key(struct lens *lens, struct state *state) {
609 get_key(lens, state);
610 return make_skel(lens);
613 static struct tree *get_label(struct lens *lens, struct state *state) {
614 ensure0(lens->tag == L_LABEL, state->info);
615 state->key = strdup(lens->string->str);
619 static struct skel *parse_label(struct lens *lens, struct state *state) {
620 get_label(lens, state);
621 return make_skel(lens);
624 static struct tree *get_union(struct lens *lens, struct state *state) {
625 ensure0(lens->tag == L_UNION, state->info);
627 struct tree *tree = NULL;
629 uint old_nreg = state->nreg;
632 for (int i=0; i < lens->nchildren; i++) {
633 if (REG_MATCHED(state)) {
634 tree = get_lens(lens->children[i], state);
638 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
640 state->nreg = old_nreg;
642 get_expected_error(state, lens);
646 static struct skel *parse_union(struct lens *lens, struct state *state,
647 struct dict **dict) {
648 ensure0(lens->tag == L_UNION, state->info);
650 struct skel *skel = NULL;
652 uint old_nreg = state->nreg;
655 for (int i=0; i < lens->nchildren; i++) {
656 struct lens *l = lens->children[i];
657 if (REG_MATCHED(state)) {
658 skel = parse_lens(l, state, dict);
662 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
664 state->nreg = old_nreg;
666 get_expected_error(state, lens);
671 static struct tree *get_concat(struct lens *lens, struct state *state) {
672 ensure0(lens->tag == L_CONCAT, state->info);
674 struct tree *tree = NULL;
675 uint old_nreg = state->nreg;
678 for (int i=0; i < lens->nchildren; i++) {
679 struct tree *t = NULL;
680 if (! REG_VALID(state)) {
681 get_error(state, lens->children[i],
682 "Not enough components in concat");
684 state->nreg = old_nreg;
688 t = get_lens(lens->children[i], state);
689 list_append(tree, t);
690 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
692 state->nreg = old_nreg;
697 static struct skel *parse_concat(struct lens *lens, struct state *state,
698 struct dict **dict) {
699 ensure0(lens->tag == L_CONCAT, state->info);
700 struct skel *skel = make_skel(lens);
701 uint old_nreg = state->nreg;
704 for (int i=0; i < lens->nchildren; i++) {
705 struct skel *sk = NULL;
706 struct dict *di = NULL;
707 if (! REG_VALID(state)) {
708 get_error(state, lens->children[i],
709 "Not enough components in concat");
714 sk = parse_lens(lens->children[i], state, &di);
715 list_append(skel->skels, sk);
716 dict_append(dict, di);
717 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
719 state->nreg = old_nreg;
724 /* Given a lens that does not match at the current position, try to find
725 * the left-most child LAST that does match and the lens next to it NEXT
726 * that does not match. Return the length of the match.
728 * If no such child exists, return 0
730 static int try_match(struct lens *lens, struct state *state,
731 uint start, uint end,
732 struct lens **last, struct lens **next) {
746 result = regexp_match(lens->ctype, state->text, end, start, NULL);
751 for (int i=0; i < lens->nchildren; i++) {
752 struct lens *child = lens->children[i];
753 struct lens *next_child =
754 (i < lens->nchildren - 1) ? lens->children[i+1] : NULL;
756 r = regexp_match(child->ctype, state->text, end, start, NULL);
761 } else if (result > 0) {
766 result = try_match(child, state, start, end, last, next);
767 if (result > 0 && *next == NULL)
775 for (int i=0; i < lens->nchildren; i++) {
776 struct lens *child = lens->children[i];
777 result = try_match(child, state, start, end, last, next);
787 return try_match(lens->child, state, start, end, last, next);
790 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
797 static void short_iteration_error(struct lens *lens, struct state *state,
798 uint start, uint end) {
801 get_error(state, lens, "%s", short_iteration);
803 match_count = try_match(lens->child, state, start, end,
804 &state->error->last, &state->error->next);
805 state->error->pos = start + match_count;
808 static struct tree *get_quant_star(struct lens *lens, struct state *state) {
809 ensure0(lens->tag == L_STAR, state->info);
810 struct lens *child = lens->child;
811 struct tree *tree = NULL, *tail = NULL;
812 uint end = REG_END(state);
813 uint start = REG_START(state);
814 uint size = end - start;
817 while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
818 struct tree *t = NULL;
820 t = get_lens(lens->child, state);
821 list_tail_cons(tree, tail, t);
823 start += REG_SIZE(state);
824 size -= REG_SIZE(state);
829 short_iteration_error(lens, state, start, end);
834 static struct skel *parse_quant_star(struct lens *lens, struct state *state,
835 struct dict **dict) {
836 ensure0(lens->tag == L_STAR, state->info);
837 struct lens *child = lens->child;
838 struct skel *skel = make_skel(lens), *tail = NULL;
839 uint end = REG_END(state);
840 uint start = REG_START(state);
841 uint size = end - start;
845 while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
847 struct dict *di = NULL;
849 sk = parse_lens(lens->child, state, &di);
850 list_tail_cons(skel->skels, tail, sk);
851 dict_append(dict, di);
853 start += REG_SIZE(state);
854 size -= REG_SIZE(state);
859 get_error(state, lens, "%s", short_iteration);
864 static struct tree *get_quant_maybe(struct lens *lens, struct state *state) {
865 ensure0(lens->tag == L_MAYBE, state->info);
866 struct tree *tree = NULL;
868 /* Check that our child matched. For a construct like (r)?, the
869 * matcher will report a zero length match for group 0, even if r
870 * does not match at all
873 if (REG_MATCHED(state)) {
874 tree = get_lens(lens->child, state);
880 static struct skel *parse_quant_maybe(struct lens *lens, struct state *state,
881 struct dict **dict) {
882 ensure0(lens->tag == L_MAYBE, state->info);
884 struct skel *skel = NULL;
887 if (REG_MATCHED(state)) {
888 skel = parse_lens(lens->child, state, dict);
892 skel = make_skel(lens);
896 static struct tree *get_subtree(struct lens *lens, struct state *state) {
897 char *key = state->key;
898 char *value = state->value;
899 struct span *span = move(state->span);
901 struct tree *tree = NULL, *children;
905 if (state->enable_span) {
906 state->span = make_span(state->info);
907 ERR_NOMEM(state->span == NULL, state->info);
910 children = get_lens(lens->child, state);
912 tree = make_tree(state->key, state->value, NULL, children);
913 ERR_NOMEM(tree == NULL, state->info);
914 tree->span = move(state->span);
916 if (tree->span != NULL) {
917 update_span(span, tree->span->span_start, tree->span->span_end);
921 state->value = value;
925 free_span(state->span);
930 static struct skel *parse_subtree(struct lens *lens, struct state *state,
931 struct dict **dict) {
932 char *key = state->key;
934 struct dict *di = NULL;
937 skel = parse_lens(lens->child, state, &di);
938 *dict = make_dict(state->key, skel, di);
940 return make_skel(lens);
943 /* Check if left and right strings matches according to the square lens
946 * Returns 1 if strings matches, 0 otherwise
948 static int square_match(struct lens *lens, char *left, char *right) {
950 struct lens *concat = NULL;
952 // if one of the argument is NULL, returns no match
953 if (left == NULL || right == NULL || lens == NULL)
956 concat = lens->child;
957 /* If either right or left lens is nocase, then ignore case */
958 if (child_first(concat)->ctype->nocase ||
959 child_last(concat)->ctype->nocase) {
960 cmp = STRCASEEQ(left, right);
962 cmp = STREQ(left, right);
968 * This function applies only for non-recursive lens, handling of recursive
969 * square is done in visit_exit().
971 static struct tree *get_square(struct lens *lens, struct state *state) {
972 ensure0(lens->tag == L_SQUARE, state->info);
974 struct lens *concat = lens->child;
975 struct tree *tree = NULL;
976 uint end = REG_END(state);
977 uint start = REG_START(state);
978 char *rsqr = NULL, *lsqr = NULL;
982 r = match(state, lens->child, lens->child->ctype, end, start);
983 ERR_NOMEM(r < 0, state->info);
985 tree = get_lens(lens->child, state);
987 /* retrieve left component */
989 start = REG_START(state);
990 end = REG_END(state);
991 lsqr = token_range(state->text, start, end);
993 /* retrieve right component */
994 /* compute nreg for the last children */
995 for (int i = 0; i < concat->nchildren - 1; i++)
996 state->nreg += 1 + regexp_nsub(concat->children[i]->ctype);
998 start = REG_START(state);
999 end = REG_END(state);
1000 rsqr = token_range(state->text, start, end);
1002 if (!square_match(lens, lsqr, rsqr)) {
1003 get_error(state, lens, "%s \"%s\" %s \"%s\"",
1004 "Parse error: mismatched in square lens, expecting", lsqr,
1010 RESTORE_REGS(state);
1021 static struct skel *parse_square(struct lens *lens, struct state *state,
1022 struct dict **dict) {
1023 ensure0(lens->tag == L_SQUARE, state->info);
1024 uint end = REG_END(state);
1025 uint start = REG_START(state);
1026 struct skel *skel = NULL, *sk = NULL;
1030 r = match(state, lens->child, lens->child->ctype, end, start);
1031 ERR_NOMEM(r < 0, state->info);
1033 skel = parse_lens(lens->child, state, dict);
1036 sk = make_skel(lens);
1040 RESTORE_REGS(state);
1045 * Helpers for recursive lenses
1049 static void print_frames(struct rec_state *state) {
1050 for (int j = state->fused - 1; j >=0; j--) {
1051 struct frame *f = state->frames + j;
1052 for (int i=0; i < state->lvl; i++) fputc(' ', stderr);
1053 fprintf(stderr, "%2d %s %s", j, f->key, f->value);
1054 if (f->tree == NULL) {
1055 fprintf(stderr, " - ");
1057 fprintf(stderr, " { %s = %s } ", f->tree->label, f->tree->value);
1059 char *s = format_lens(f->lens);
1061 fprintf(stderr, "%s\n", s);
1068 static struct frame *top_frame(struct rec_state *state) {
1069 ensure0(state->fsize > 0, state->state->info);
1070 return state->frames + state->fused - 1;
1073 /* The nth frame from the top of the stack, where 0th frame is the top */
1075 static struct frame *nth_frame(struct rec_state *state, uint n) {
1076 ensure0(state->fsize > n, state->state->info);
1077 return state->frames + state->fused - (n+1);
1080 static struct frame *push_frame(struct rec_state *state, struct lens *lens) {
1083 if (state->fused >= state->fsize) {
1084 uint expand = state->fsize;
1087 r = REALLOC_N(state->frames, state->fsize + expand);
1088 ERR_NOMEM(r < 0, state->state->info);
1089 state->fsize += expand;
1094 struct frame *top = top_frame(state);
1102 static struct frame *pop_frame(struct rec_state *state) {
1103 ensure0(state->fused > 0, state->state->info);
1105 struct frame *result = top_frame(state);
1110 static void dbg_visit(struct lens *lens, char action, size_t start, size_t end,
1111 int fused, int lvl) {
1113 for (int i=0; i < lvl; i++)
1115 lns = format_lens(lens);
1116 fprintf(stderr, "%c %zd..%zd %d %s\n", action, start, end,
1121 static void get_terminal(struct frame *top, struct lens *lens,
1122 struct state *state) {
1123 top->tree = get_lens(lens, state);
1124 top->key = state->key;
1125 top->value = state->value;
1127 state->value = NULL;
1130 static void parse_terminal(struct frame *top, struct lens *lens,
1131 struct state *state) {
1133 top->skel = parse_lens(lens, state, &top->dict);
1134 top->key = state->key;
1138 static void visit_terminal(struct lens *lens, size_t start, size_t end,
1140 struct rec_state *rec_state = data;
1141 struct state *state = rec_state->state;
1144 if (state->error != NULL)
1148 if (debugging("cf.get"))
1149 dbg_visit(lens, 'T', start, end, rec_state->fused, rec_state->lvl);
1150 match(state, lens, lens->ctype, end, start);
1151 struct frame *top = push_frame(rec_state, lens);
1152 ERR_BAIL(state->info);
1153 if (rec_state->mode == M_GET)
1154 get_terminal(top, lens, state);
1156 parse_terminal(top, lens, state);
1157 child = ast_append(rec_state, lens, start, end);
1158 ERR_NOMEM(child == NULL, state->info);
1160 RESTORE_REGS(state);
1163 static bool rec_gen_span(struct rec_state *rec_state) {
1164 return ((rec_state->mode == M_GET) && (rec_state->state->enable_span));
1167 static void visit_enter(struct lens *lens,
1168 ATTRIBUTE_UNUSED size_t start,
1169 ATTRIBUTE_UNUSED size_t end,
1171 struct rec_state *rec_state = data;
1172 struct state *state = rec_state->state;
1175 if (state->error != NULL)
1178 if (debugging("cf.get"))
1179 dbg_visit(lens, '{', start, end, rec_state->fused, rec_state->lvl);
1180 rec_state->lvl += 1;
1181 if (lens->tag == L_SUBTREE) {
1182 /* Same for parse and get */
1183 /* Use this frame to preserve the current state before we process
1184 the contents of the subtree, i.e., lens->child */
1185 struct frame *f = push_frame(rec_state, lens);
1186 ERR_BAIL(state->info);
1187 f->key = state->key;
1188 f->value = state->value;
1190 state->value = NULL;
1191 if (rec_gen_span(rec_state)) {
1192 f->span = state->span;
1193 state->span = make_span(state->info);
1194 ERR_NOMEM(state->span == NULL, state->info);
1196 } else if (lens->tag == L_MAYBE) {
1197 /* Push a frame as a marker so we can tell whether lens->child
1198 actually had a match or not */
1199 push_frame(rec_state, lens);
1200 ERR_BAIL(state->info);
1202 child = ast_append(rec_state, lens, start, end);
1204 rec_state->ast = child;
1209 /* Combine n frames from the stack into one new frame for M_GET */
1210 static void get_combine(struct rec_state *rec_state,
1211 struct lens *lens, uint n) {
1212 struct tree *tree = NULL, *tail = NULL;
1213 char *key = NULL, *value = NULL;
1214 struct frame *top = NULL;
1216 for (int i=0; i < n; i++) {
1217 top = pop_frame(rec_state);
1218 ERR_BAIL(lens->info);
1219 list_tail_cons(tree, tail, top->tree);
1220 /* top->tree might have more than one node, update tail */
1222 while (tail->next != NULL) tail = tail->next;
1224 if (top->key != NULL) {
1225 ensure(key == NULL, rec_state->state->info);
1228 if (top->value != NULL) {
1229 ensure(value == NULL, rec_state->state->info);
1233 top = push_frame(rec_state, lens);
1234 ERR_BAIL(lens->info);
1242 /* Combine n frames from the stack into one new frame for M_PUT */
1243 static void parse_combine(struct rec_state *rec_state,
1244 struct lens *lens, uint n) {
1245 struct skel *skel = make_skel(lens), *tail = NULL;
1246 struct dict *dict = NULL;
1248 struct frame *top = NULL;
1250 for (int i=0; i < n; i++) {
1251 top = pop_frame(rec_state);
1252 ERR_BAIL(lens->info);
1253 list_tail_cons(skel->skels, tail, top->skel);
1254 /* top->skel might have more than one node, update skel */
1256 while (tail->next != NULL) tail = tail->next;
1257 dict_append(&dict, top->dict);
1258 if (top->key != NULL) {
1259 ensure(key == NULL, rec_state->state->info);
1263 top = push_frame(rec_state, lens);
1264 ERR_BAIL(lens->info);
1265 top->skel = move(skel);
1266 top->dict = move(dict);
1274 static void visit_exit_put_subtree(struct lens *lens,
1275 struct rec_state *rec_state,
1276 struct frame *top) {
1277 struct state *state = rec_state->state;
1278 struct skel *skel = NULL;
1279 struct dict *dict = NULL;
1281 skel = make_skel(lens);
1282 ERR_NOMEM(skel == NULL, lens->info);
1283 dict = make_dict(top->key, top->skel, top->dict);
1284 ERR_NOMEM(dict == NULL, lens->info);
1286 top = pop_frame(rec_state);
1287 ERR_BAIL(state->info);
1288 ensure(lens == top->lens, state->info);
1289 state->key = top->key;
1290 top = push_frame(rec_state, lens);
1291 ERR_BAIL(state->info);
1292 top->skel = move(skel);
1293 top->dict = move(dict);
1299 static void visit_exit(struct lens *lens,
1300 ATTRIBUTE_UNUSED size_t start,
1301 ATTRIBUTE_UNUSED size_t end,
1303 struct rec_state *rec_state = data;
1304 struct state *state = rec_state->state;
1305 struct tree *tree = NULL;
1307 if (state->error != NULL)
1310 rec_state->lvl -= 1;
1311 if (debugging("cf.get"))
1312 dbg_visit(lens, '}', start, end, rec_state->fused, rec_state->lvl);
1314 ERR_BAIL(lens->info);
1316 if (lens->tag == L_SUBTREE) {
1317 /* Get the result of parsing lens->child */
1318 struct frame *top = pop_frame(rec_state);
1319 ERR_BAIL(state->info);
1320 if (rec_state->mode == M_GET) {
1321 tree = make_tree(top->key, top->value, NULL, top->tree);
1322 ERR_NOMEM(tree == NULL, lens->info);
1323 tree->span = state->span;
1324 /* Restore the parse state from before entering this subtree */
1325 top = pop_frame(rec_state);
1326 ERR_BAIL(state->info);
1327 ensure(lens == top->lens, state->info);
1328 state->key = top->key;
1329 state->value = top->value;
1330 state->span = top->span;
1331 /* Push the result of parsing this subtree */
1332 top = push_frame(rec_state, lens);
1333 ERR_BAIL(state->info);
1334 top->tree = move(tree);
1336 visit_exit_put_subtree(lens, rec_state, top);
1338 } else if (lens->tag == L_CONCAT) {
1339 ensure(rec_state->fused >= lens->nchildren, state->info);
1340 for (int i = 0; i < lens->nchildren; i++) {
1341 struct frame *fr = nth_frame(rec_state, i);
1342 ERR_BAIL(state->info);
1343 BUG_ON(lens->children[i] != fr->lens,
1345 "Unexpected lens in concat %zd..%zd\n Expected: %s\n Actual: %s",
1347 format_lens(lens->children[i]),
1348 format_lens(fr->lens));
1350 rec_state->combine(rec_state, lens, lens->nchildren);
1351 } else if (lens->tag == L_STAR) {
1353 while (n < rec_state->fused &&
1354 nth_frame(rec_state, n)->lens == lens->child)
1356 ERR_BAIL(state->info);
1357 rec_state->combine(rec_state, lens, n);
1358 } else if (lens->tag == L_MAYBE) {
1360 if (rec_state->fused > 0
1361 && top_frame(rec_state)->lens == lens->child) {
1364 ERR_BAIL(state->info);
1365 /* If n = 2, the top of the stack is our child's result, and the
1366 frame underneath it is the marker frame we pushed during
1367 visit_enter. Combine these two frames into one, which represents
1368 the result of parsing the whole L_MAYBE. */
1369 rec_state->combine(rec_state, lens, n);
1370 } else if (lens->tag == L_SQUARE) {
1371 if (rec_state->mode == M_GET) {
1372 struct ast *square, *concat, *right, *left;
1376 square = rec_state->ast;
1377 concat = child_first(square);
1378 right = child_first(concat);
1379 left = child_last(concat);
1380 lsqr = token_range(state->text, left->start, left->end);
1381 rsqr = token_range(state->text, right->start, right->end);
1382 ret = square_match(lens, lsqr, rsqr);
1384 get_error(state, lens, "%s \"%s\" %s \"%s\"",
1385 "Parse error: mismatched in square lens, expecting", lsqr,
1393 rec_state->combine(rec_state, lens, 1);
1395 /* Turn the top frame from having the result of one of our children
1396 to being our result */
1397 top_frame(rec_state)->lens = lens;
1398 ERR_BAIL(state->info);
1406 static void visit_error(struct lens *lens, void *data, size_t pos,
1407 const char *format, ...) {
1408 struct rec_state *rec_state = data;
1411 va_start(ap, format);
1412 vget_error(rec_state->state, lens, format, ap);
1414 rec_state->state->error->pos = rec_state->start + pos;
1417 static struct frame *rec_process(enum mode_t mode, struct lens *lens,
1418 struct state *state) {
1419 uint end = REG_END(state);
1420 uint start = REG_START(state);
1423 struct jmt_visitor visitor;
1424 struct rec_state rec_state;
1426 struct frame *f = NULL;
1428 MEMZERO(&rec_state, 1);
1429 MEMZERO(&visitor, 1);
1432 if (lens->jmt == NULL) {
1433 lens->jmt = jmt_build(lens);
1434 ERR_BAIL(lens->info);
1437 rec_state.mode = mode;
1438 rec_state.state = state;
1439 rec_state.fused = 0;
1441 rec_state.start = start;
1442 rec_state.ast = make_ast(lens);
1443 rec_state.combine = (mode == M_GET) ? get_combine : parse_combine;
1444 ERR_NOMEM(rec_state.ast == NULL, state->info);
1446 visitor.parse = jmt_parse(lens->jmt, state->text + start, end - start);
1447 ERR_BAIL(lens->info);
1448 visitor.terminal = visit_terminal;
1449 visitor.enter = visit_enter;
1450 visitor.exit = visit_exit;
1451 visitor.error = visit_error;
1452 visitor.data = &rec_state;
1453 r = jmt_visit(&visitor, &len);
1454 ERR_BAIL(lens->info);
1456 get_error(state, lens, "Syntax error");
1457 state->error->pos = start + len;
1459 if (rec_state.fused == 0) {
1460 get_error(state, lens,
1461 "Parse did not leave a result on the stack");
1463 } else if (rec_state.fused > 1) {
1464 get_error(state, lens,
1465 "Parse left additional garbage on the stack");
1469 rec_state.ast = ast_root(rec_state.ast);
1470 ensure(rec_state.ast->parent == NULL, state->info);
1472 if (debugging("cf.get.ast"))
1473 print_ast(ast_root(rec_state.ast), 0);
1474 RESTORE_REGS(state);
1475 jmt_free_parse(visitor.parse);
1476 free_ast(ast_root(rec_state.ast));
1477 return rec_state.frames;
1480 for(i = 0; i < rec_state.fused; i++) {
1481 f = nth_frame(&rec_state, i);
1484 if (mode == M_GET) {
1487 } else if (mode == M_PARSE) {
1492 FREE(rec_state.frames);
1496 static struct tree *get_rec(struct lens *lens, struct state *state) {
1498 struct tree *tree = NULL;
1500 fr = rec_process(M_GET, lens, state);
1503 state->key = fr->key;
1504 state->value = fr->value;
1510 static struct skel *parse_rec(struct lens *lens, struct state *state,
1511 struct dict **dict) {
1512 struct skel *skel = NULL;
1515 fr = rec_process(M_PARSE, lens, state);
1519 state->key = fr->key;
1525 static struct tree *get_lens(struct lens *lens, struct state *state) {
1526 struct tree *tree = NULL;
1530 tree = get_del(lens, state);
1533 tree = get_store(lens, state);
1536 tree = get_value(lens, state);
1539 tree = get_key(lens, state);
1542 tree = get_label(lens, state);
1545 tree = get_seq(lens, state);
1548 tree = get_counter(lens, state);
1551 tree = get_concat(lens, state);
1554 tree = get_union(lens, state);
1557 tree = get_subtree(lens, state);
1560 tree = get_quant_star(lens, state);
1563 tree = get_quant_maybe(lens, state);
1566 tree = get_square(lens, state);
1569 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
1576 /* Initialize registers. Return 0 if the lens matches the entire text, 1 if
1577 * it does not and -1 on error.
1579 static int init_regs(struct state *state, struct lens *lens, uint size) {
1582 if (lens->tag != L_STAR && ! lens->recursive) {
1583 r = match(state, lens, lens->ctype, size, 0);
1585 get_error(state, lens, "Input string does not match at all");
1590 /* Special case the very common situation that the lens is (l)*
1591 * We can avoid matching the entire text in that case - that
1592 * match can be very expensive
1594 if (ALLOC(state->regs) < 0)
1596 state->regs->num_regs = 1;
1597 if (ALLOC(state->regs->start) < 0 || ALLOC(state->regs->end) < 0)
1599 state->regs->start[0] = 0;
1600 state->regs->end[0] = size;
1604 struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
1605 int enable_span, struct lns_error **err) {
1607 struct tree *tree = NULL;
1608 uint size = strlen(text);
1612 r = ALLOC(state.info);
1613 ERR_NOMEM(r < 0, info);
1615 *state.info = *info;
1616 state.info->ref = UINT_MAX;
1620 state.enable_span = enable_span;
1622 /* We are probably being overly cautious here: if the lens can't process
1623 * all of TEXT, we should really fail somewhere in one of the sublenses.
1624 * But to be safe, we check that we can process everything anyway, then
1625 * try to process, hoping we'll get a more specific error, and if that
1626 * fails, we throw our arms in the air and say 'something went wrong'
1628 partial = init_regs(&state, lens, size);
1630 if (lens->recursive)
1631 tree = get_rec(lens, &state);
1633 tree = get_lens(lens, &state);
1636 free_seqs(state.seqs);
1637 if (state.key != NULL) {
1638 get_error(&state, lens, "get left unused key %s", state.key);
1641 if (state.value != NULL) {
1642 get_error(&state, lens, "get left unused value %s", state.value);
1645 if (partial && state.error == NULL) {
1646 get_error(&state, lens, "Get did not match entire input");
1656 if (state.error != NULL) {
1660 free_lns_error(state.error);
1665 static struct skel *parse_lens(struct lens *lens, struct state *state,
1666 struct dict **dict) {
1667 struct skel *skel = NULL;
1671 skel = parse_del(lens, state);
1674 skel = parse_store(lens, state);
1677 skel = parse_value(lens, state);
1680 skel = parse_key(lens, state);
1683 skel = parse_label(lens, state);
1686 skel = parse_seq(lens, state);
1689 skel = parse_counter(lens, state);
1692 skel = parse_concat(lens, state, dict);
1695 skel = parse_union(lens, state, dict);
1698 skel = parse_subtree(lens, state, dict);
1701 skel = parse_quant_star(lens, state, dict);
1704 skel = parse_quant_maybe(lens, state, dict);
1707 skel = parse_square(lens, state, dict);
1710 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
1717 struct skel *lns_parse(struct lens *lens, const char *text, struct dict **dict,
1718 struct lns_error **err) {
1720 struct skel *skel = NULL;
1721 uint size = strlen(text);
1725 r = ALLOC(state.info);
1726 ERR_NOMEM(r< 0, lens->info);
1727 state.info->ref = UINT_MAX;
1728 state.info->error = lens->info->error;
1733 partial = init_regs(&state, lens, size);
1736 if (lens->recursive)
1737 skel = parse_rec(lens, &state, dict);
1739 skel = parse_lens(lens, &state, dict);
1741 free_seqs(state.seqs);
1742 if (state.error != NULL) {
1748 if (state.key != NULL) {
1749 get_error(&state, lens, "parse left unused key %s", state.key);
1752 if (state.value != NULL) {
1753 get_error(&state, lens, "parse left unused value %s", state.value);
1757 // This should never happen during lns_parse
1758 get_error(&state, lens, "parse can not process entire input");
1767 free_lns_error(state.error);
1774 * indent-tabs-mode: nil