2 * parser.c: parse a configuration file according to a grammar
4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
36 /* Our favorite error message */
37 static const char *const short_iteration =
38 "Iterated lens matched less than it should";
52 char *value; /* GET_STORE leaves a value here */
53 struct lns_error *error;
54 /* We use the registers from a regular expression match to keep track
55 * of the substring we are currently looking at. REGS are the registers
56 * from the last regexp match; NREG is the number of the register
57 * in REGS that describes the substring we are currently looking at.
59 * We adjust NREG as we traverse lenses either because we know the
60 * grouping structure of lenses (for L_STAR, the child lens is always
61 * in NREG + 1) or by looking at the number of groups in a sublens (as
62 * we move from one child of L_CONCAT to the next, we need to add 1 +
63 * number of groups of that child to NREG) How NREG is adjusted is
64 * closely related to how the REGEXP_* routines build up bigger regexps
67 struct re_registers *regs;
71 /* Used by recursive lenses to stack intermediate results
72 Frames are used for a few different purposes:
74 (1) to save results from individual lenses that are later gathered and
75 combined by combinators like L_STAR and L_CONCAT
76 (2) to preserve parse state when descending into a L_SUBTREE
77 (3) as a marker to determine if a L_MAYBE had a match or not
88 struct { /* M_PARSE */
95 /* Used by recursive lenses in get_rec and parse_rec */
96 enum mode_t { M_GET, M_PARSE };
98 /* Abstract Syntax Tree for recursive parse */
101 struct ast **children;
114 struct frame *frames;
116 uint lvl; /* Debug only */
118 /* Will either be get_combine or parse_combine, depending on MODE, for
119 the duration of the whole recursive parse */
120 void (*combine)(struct rec_state *, struct lens *, uint);
123 #define REG_START(state) ((state)->regs->start[(state)->nreg])
124 #define REG_END(state) ((state)->regs->end[(state)->nreg])
125 #define REG_SIZE(state) (REG_END(state) - REG_START(state))
126 #define REG_POS(state) ((state)->text + REG_START(state))
127 #define REG_VALID(state) ((state)->regs != NULL && \
128 (state)->nreg < (state)->regs->num_regs)
129 #define REG_MATCHED(state) (REG_VALID(state) \
130 && (state)->regs->start[(state)->nreg] >= 0)
132 /* The macros SAVE_REGS and RESTORE_REGS are used to remember and restore
133 * where our caller was in processing their regexp match before we do
134 * matching ourselves (and would therefore clobber the caller's state)
136 * These two macros are admittedly horrible in that they declare local
137 * variables called OLD_REGS and OLD_NREG. The alternative to avoiding that
138 * would be to manually manage a stack of regs/nreg in STATE.
140 #define SAVE_REGS(state) \
141 struct re_registers *old_regs = state->regs; \
142 uint old_nreg = state->nreg; \
143 state->regs = NULL; \
146 #define RESTORE_REGS(state) \
148 state->regs = old_regs; \
149 state->nreg = old_nreg;
154 static struct ast *make_ast(struct lens *lens) {
155 struct ast *ast = NULL;
161 if (ALLOC_N(ast->children, ast->capacity) < 0) {
168 /* recursively free the node and all it's descendants */
169 static void free_ast(struct ast *ast) {
173 for (i = 0; i < ast->nchildren; i++) {
174 free_ast(ast->children[i]);
176 if (ast->children != NULL)
181 static struct ast *ast_root(struct ast *ast) {
182 struct ast *root = ast;
183 while(root != NULL && root->parent != NULL)
188 static void ast_set(struct ast *ast, uint start, uint end) {
195 /* append a child to the parent ast */
196 static struct ast *ast_append(struct rec_state *rec_state, struct lens *lens,
197 uint start, uint end) {
199 struct ast *child, *parent;
200 struct state *state = rec_state->state;
202 parent = rec_state->ast;
206 child = make_ast(lens);
207 ERR_NOMEM(child == NULL, state->info);
209 ast_set(child, start, end);
210 if (parent->nchildren >= parent->capacity) {
211 ret = REALLOC_N(parent->children, parent->capacity * 2);
212 ERR_NOMEM(ret < 0, state->info);
213 parent->capacity = parent->capacity * 2;
215 parent->children[parent->nchildren++] = child;
216 child->parent = parent;
224 /* pop the ast from one level, fail the parent is NULL */
225 static void ast_pop(struct rec_state *rec_state) {
226 ensure(rec_state->ast != NULL && rec_state->ast->parent != NULL, rec_state->state->info);
227 rec_state->ast = rec_state->ast->parent;
232 static void print_ast(const struct ast *ast, int lvl) {
237 for (i = 0; i < lvl; i++) fputs(" ", stdout);
238 lns = format_lens(ast->lens);
239 printf("%d..%d %s\n", ast->start, ast->end, lns);
241 for (i = 0; i < ast->nchildren; i++) {
242 print_ast(ast->children[i], lvl + 1);
246 static void get_error(struct state *state, struct lens *lens,
247 const char *format, ...)
248 ATTRIBUTE_FORMAT(printf, 3, 4);
250 void free_lns_error(struct lns_error *err) {
255 unref(err->lens, lens);
259 static void vget_error(struct state *state, struct lens *lens,
260 const char *format, va_list ap) {
263 if (state->error != NULL)
265 if (ALLOC(state->error) < 0)
267 state->error->lens = ref(lens);
268 if (REG_MATCHED(state))
269 state->error->pos = REG_END(state);
271 state->error->pos = 0;
272 r = vasprintf(&state->error->message, format, ap);
274 state->error->message = NULL;
277 static void get_error(struct state *state, struct lens *lens,
278 const char *format, ...)
282 va_start(ap, format);
283 vget_error(state, lens, format, ap);
287 static struct skel *make_skel(struct lens *lens) {
289 enum lens_tag tag = lens->tag;
295 void free_skel(struct skel *skel) {
298 if (skel->tag == L_CONCAT || skel->tag == L_STAR || skel->tag == L_MAYBE ||
299 skel->tag == L_SQUARE) {
300 while (skel->skels != NULL) {
301 struct skel *del = skel->skels;
302 skel->skels = del->next;
305 } else if (skel->tag == L_DEL) {
311 static void print_skel(struct skel *skel);
312 static void print_skel_list(struct skel *skels, const char *beg,
313 const char *sep, const char *end) {
315 list_for_each(s, skels) {
323 static void print_skel(struct skel *skel) {
326 if (skel->text == NULL) {
330 print_chars(stdout, skel->text, -1);
335 print_skel_list(skel->skels, "", " . ", "");
338 print_skel_list(skel->skels, "(", " ", ")*");
341 print_skel_list(skel->skels, "(", " ", ")?");
344 print_skel_list(skel->skels, "[", " ", "]");
352 // DICT_DUMP is only used for debugging
354 static void print_dict(struct dict *dict, int indent) {
355 list_for_each(d, dict) {
356 printf("%*s%s:\n", indent, "", d->key);
357 list_for_each(e, d->entry) {
358 printf("%*s", indent+2, "");
361 print_dict(e->dict, indent+2);
367 static void get_expected_error(struct state *state, struct lens *l) {
368 /* Size of the excerpt of the input text we'll show */
369 static const int wordlen = 10;
370 char word[wordlen+1];
373 if (REG_MATCHED(state))
374 strncpy(word, REG_POS(state), wordlen);
376 strncpy(word, state->text, wordlen);
377 word[wordlen] = '\0';
378 for (p = word; *p != '\0' && *p != '\n'; p++);
381 pat = escape(l->ctype->pattern->str, -1, NULL);
382 get_error(state, l, "expected %s at '%s'", pat, word);
387 * Splitting of the input text
390 static char *token(struct state *state) {
391 ensure0(REG_MATCHED(state), state->info);
392 return strndup(REG_POS(state), REG_SIZE(state));
395 static char *token_range(const char *text, uint start, uint end) {
396 return strndup(text + start, end - start);
399 static void regexp_match_error(struct state *state, struct lens *lens,
400 int count, struct regexp *r) {
402 char *pat = regexp_escape(r);
404 if (state->regs != NULL)
405 text = strndup(REG_POS(state), REG_SIZE(state));
407 text = strdup("(unknown)");
410 get_error(state, lens, "Failed to match /%s/ with %s", pat, text);
411 } else if (count == -2) {
412 get_error(state, lens, "Internal error matching /%s/ with %s",
414 } else if (count == -3) {
415 /* Should have been caught by the typechecker */
416 get_error(state, lens, "Syntax error in regexp /%s/", pat);
422 static void no_match_error(struct state *state, struct lens *lens) {
423 ensure(lens->tag == L_KEY || lens->tag == L_DEL
424 || lens->tag == L_STORE, state->info);
425 char *pat = regexp_escape(lens->ctype);
426 const char *lname = "(lname)";
427 if (lens->tag == L_KEY)
429 else if (lens->tag == L_DEL)
431 else if (lens->tag == L_STORE)
433 get_error(state, lens, "no match for %s /%s/", lname, pat);
439 /* Modifies STATE->REGS and STATE->NREG. The caller must save these
440 * if they are still needed
442 * Return the number of characters matched
444 static int match(struct state *state, struct lens *lens,
445 struct regexp *re, uint size, uint start) {
446 struct re_registers *regs;
452 count = regexp_match(re, state->text, size, start, regs);
454 regexp_match_error(state, lens, count, re);
463 static void free_regs(struct state *state) {
464 if (state->regs != NULL) {
465 free(state->regs->start);
466 free(state->regs->end);
471 static struct tree *get_lens(struct lens *lens, struct state *state);
472 static struct skel *parse_lens(struct lens *lens, struct state *state,
475 static void free_seqs(struct seq *seqs) {
476 /* Do not free seq->name; it's not owned by the seq, but by some lens */
480 static struct seq *find_seq(const char *name, struct state *state) {
481 ensure0(name != NULL, state->info);
484 for (seq=state->seqs;
485 seq != NULL && STRNEQ(seq->name, name);
492 list_append(state->seqs, seq);
498 static struct tree *get_seq(struct lens *lens, struct state *state) {
499 ensure0(lens->tag == L_SEQ, state->info);
500 struct seq *seq = find_seq(lens->string->str, state);
503 r = asprintf((char **) &(state->key), "%d", seq->value);
504 ERR_NOMEM(r < 0, state->info);
511 static struct skel *parse_seq(struct lens *lens, struct state *state) {
512 get_seq(lens, state);
513 return make_skel(lens);
516 static struct tree *get_counter(struct lens *lens, struct state *state) {
517 ensure0(lens->tag == L_COUNTER, state->info);
518 struct seq *seq = find_seq(lens->string->str, state);
523 static struct skel *parse_counter(struct lens *lens, struct state *state) {
524 get_counter(lens, state);
525 return make_skel(lens);
528 static struct tree *get_del(struct lens *lens, struct state *state) {
529 ensure0(lens->tag == L_DEL, state->info);
530 if (! REG_MATCHED(state)) {
531 char *pat = regexp_escape(lens->ctype);
532 get_error(state, lens, "no match for del /%s/", pat);
535 update_span(state->span, REG_START(state), REG_END(state));
539 static struct skel *parse_del(struct lens *lens, struct state *state) {
540 ensure0(lens->tag == L_DEL, state->info);
541 struct skel *skel = NULL;
543 skel = make_skel(lens);
544 if (! REG_MATCHED(state))
545 no_match_error(state, lens);
547 skel->text = token(state);
551 static struct tree *get_store(struct lens *lens, struct state *state) {
552 ensure0(lens->tag == L_STORE, state->info);
553 ensure0(state->value == NULL, state->info);
555 struct tree *tree = NULL;
557 if (state->value != NULL)
558 get_error(state, lens, "More than one store in a subtree");
559 else if (! REG_MATCHED(state))
560 no_match_error(state, lens);
562 state->value = token(state);
564 state->span->value_start = REG_START(state);
565 state->span->value_end = REG_END(state);
566 update_span(state->span, REG_START(state), REG_END(state));
572 static struct skel *parse_store(struct lens *lens,
573 ATTRIBUTE_UNUSED struct state *state) {
574 ensure0(lens->tag == L_STORE, state->info);
575 return make_skel(lens);
578 static struct tree *get_value(struct lens *lens, struct state *state) {
579 ensure0(lens->tag == L_VALUE, state->info);
580 state->value = strdup(lens->string->str);
584 static struct skel *parse_value(struct lens *lens,
585 ATTRIBUTE_UNUSED struct state *state) {
586 ensure0(lens->tag == L_VALUE, state->info);
587 return make_skel(lens);
590 static struct tree *get_key(struct lens *lens, struct state *state) {
591 ensure0(lens->tag == L_KEY, state->info);
592 if (! REG_MATCHED(state))
593 no_match_error(state, lens);
595 state->key = token(state);
597 state->span->label_start = REG_START(state);
598 state->span->label_end = REG_END(state);
599 update_span(state->span, REG_START(state), REG_END(state));
605 static struct skel *parse_key(struct lens *lens, struct state *state) {
606 get_key(lens, state);
607 return make_skel(lens);
610 static struct tree *get_label(struct lens *lens, struct state *state) {
611 ensure0(lens->tag == L_LABEL, state->info);
612 state->key = strdup(lens->string->str);
616 static struct skel *parse_label(struct lens *lens, struct state *state) {
617 get_label(lens, state);
618 return make_skel(lens);
621 static struct tree *get_union(struct lens *lens, struct state *state) {
622 ensure0(lens->tag == L_UNION, state->info);
624 struct tree *tree = NULL;
626 uint old_nreg = state->nreg;
629 for (int i=0; i < lens->nchildren; i++) {
630 if (REG_MATCHED(state)) {
631 tree = get_lens(lens->children[i], state);
635 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
637 state->nreg = old_nreg;
639 get_expected_error(state, lens);
643 static struct skel *parse_union(struct lens *lens, struct state *state,
644 struct dict **dict) {
645 ensure0(lens->tag == L_UNION, state->info);
647 struct skel *skel = NULL;
649 uint old_nreg = state->nreg;
652 for (int i=0; i < lens->nchildren; i++) {
653 struct lens *l = lens->children[i];
654 if (REG_MATCHED(state)) {
655 skel = parse_lens(l, state, dict);
659 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
661 state->nreg = old_nreg;
663 get_expected_error(state, lens);
668 static struct tree *get_concat(struct lens *lens, struct state *state) {
669 ensure0(lens->tag == L_CONCAT, state->info);
671 struct tree *tree = NULL;
672 uint old_nreg = state->nreg;
675 for (int i=0; i < lens->nchildren; i++) {
676 struct tree *t = NULL;
677 if (! REG_VALID(state)) {
678 get_error(state, lens->children[i],
679 "Not enough components in concat");
681 state->nreg = old_nreg;
685 t = get_lens(lens->children[i], state);
686 list_append(tree, t);
687 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
689 state->nreg = old_nreg;
694 static struct skel *parse_concat(struct lens *lens, struct state *state,
695 struct dict **dict) {
696 ensure0(lens->tag == L_CONCAT, state->info);
697 struct skel *skel = make_skel(lens);
698 uint old_nreg = state->nreg;
701 for (int i=0; i < lens->nchildren; i++) {
702 struct skel *sk = NULL;
703 struct dict *di = NULL;
704 if (! REG_VALID(state)) {
705 get_error(state, lens->children[i],
706 "Not enough components in concat");
711 sk = parse_lens(lens->children[i], state, &di);
712 list_append(skel->skels, sk);
713 dict_append(dict, di);
714 state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
716 state->nreg = old_nreg;
721 /* Given a lens that does not match at the current position, try to find
722 * the left-most child LAST that does match and the lens next to it NEXT
723 * that does not match. Return the length of the match.
725 * If no such child exists, return 0
727 static int try_match(struct lens *lens, struct state *state,
728 uint start, uint end,
729 struct lens **last, struct lens **next) {
743 result = regexp_match(lens->ctype, state->text, end, start, NULL);
748 for (int i=0; i < lens->nchildren; i++) {
749 struct lens *child = lens->children[i];
750 struct lens *next_child =
751 (i < lens->nchildren - 1) ? lens->children[i+1] : NULL;
753 r = regexp_match(child->ctype, state->text, end, start, NULL);
758 } else if (result > 0) {
763 result = try_match(child, state, start, end, last, next);
764 if (result > 0 && *next == NULL)
772 for (int i=0; i < lens->nchildren; i++) {
773 struct lens *child = lens->children[i];
774 result = try_match(child, state, start, end, last, next);
784 return try_match(lens->child, state, start, end, last, next);
787 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
794 static void short_iteration_error(struct lens *lens, struct state *state,
795 uint start, uint end) {
798 get_error(state, lens, "%s", short_iteration);
800 match_count = try_match(lens->child, state, start, end,
801 &state->error->last, &state->error->next);
802 state->error->pos = start + match_count;
805 static struct tree *get_quant_star(struct lens *lens, struct state *state) {
806 ensure0(lens->tag == L_STAR, state->info);
807 struct lens *child = lens->child;
808 struct tree *tree = NULL, *tail = NULL;
809 uint end = REG_END(state);
810 uint start = REG_START(state);
811 uint size = end - start;
814 while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
815 struct tree *t = NULL;
817 t = get_lens(lens->child, state);
818 list_tail_cons(tree, tail, t);
820 start += REG_SIZE(state);
821 size -= REG_SIZE(state);
826 short_iteration_error(lens, state, start, end);
831 static struct skel *parse_quant_star(struct lens *lens, struct state *state,
832 struct dict **dict) {
833 ensure0(lens->tag == L_STAR, state->info);
834 struct lens *child = lens->child;
835 struct skel *skel = make_skel(lens), *tail = NULL;
836 uint end = REG_END(state);
837 uint start = REG_START(state);
838 uint size = end - start;
842 while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
844 struct dict *di = NULL;
846 sk = parse_lens(lens->child, state, &di);
847 list_tail_cons(skel->skels, tail, sk);
848 dict_append(dict, di);
850 start += REG_SIZE(state);
851 size -= REG_SIZE(state);
856 get_error(state, lens, "%s", short_iteration);
861 static struct tree *get_quant_maybe(struct lens *lens, struct state *state) {
862 ensure0(lens->tag == L_MAYBE, state->info);
863 struct tree *tree = NULL;
865 /* Check that our child matched. For a construct like (r)?, the
866 * matcher will report a zero length match for group 0, even if r
867 * does not match at all
870 if (REG_MATCHED(state)) {
871 tree = get_lens(lens->child, state);
877 static struct skel *parse_quant_maybe(struct lens *lens, struct state *state,
878 struct dict **dict) {
879 ensure0(lens->tag == L_MAYBE, state->info);
881 struct skel *skel = NULL;
884 if (REG_MATCHED(state)) {
885 skel = parse_lens(lens->child, state, dict);
889 skel = make_skel(lens);
893 static struct tree *get_subtree(struct lens *lens, struct state *state) {
894 char *key = state->key;
895 char *value = state->value;
896 struct span *span = move(state->span);
898 struct tree *tree = NULL, *children;
902 if (state->info->flags & AUG_ENABLE_SPAN) {
903 state->span = make_span(state->info);
904 ERR_NOMEM(state->span == NULL, state->info);
907 children = get_lens(lens->child, state);
909 tree = make_tree(state->key, state->value, NULL, children);
910 ERR_NOMEM(tree == NULL, state->info);
911 tree->span = move(state->span);
913 if (tree->span != NULL) {
914 update_span(span, tree->span->span_start, tree->span->span_end);
918 state->value = value;
922 free_span(state->span);
927 static struct skel *parse_subtree(struct lens *lens, struct state *state,
928 struct dict **dict) {
929 char *key = state->key;
931 struct dict *di = NULL;
934 skel = parse_lens(lens->child, state, &di);
935 *dict = make_dict(state->key, skel, di);
937 return make_skel(lens);
940 /* Check if left and right strings matches according to the square lens
943 * Returns 1 if strings matches, 0 otherwise
945 static int square_match(struct lens *lens, char *left, char *right) {
947 struct lens *concat = NULL;
949 // if one of the argument is NULL, returns no match
950 if (left == NULL || right == NULL || lens == NULL)
953 concat = lens->child;
954 /* If either right or left lens is nocase, then ignore case */
955 if (child_first(concat)->ctype->nocase ||
956 child_last(concat)->ctype->nocase) {
957 cmp = STRCASEEQ(left, right);
959 cmp = STREQ(left, right);
965 * This function applies only for non-recursive lens, handling of recursive
966 * square is done in visit_exit().
968 static struct tree *get_square(struct lens *lens, struct state *state) {
969 ensure0(lens->tag == L_SQUARE, state->info);
971 struct lens *concat = lens->child;
972 struct tree *tree = NULL;
973 uint end = REG_END(state);
974 uint start = REG_START(state);
975 char *rsqr = NULL, *lsqr = NULL;
979 r = match(state, lens->child, lens->child->ctype, end, start);
980 ERR_NOMEM(r < 0, state->info);
982 tree = get_lens(lens->child, state);
984 /* retrieve left component */
986 start = REG_START(state);
987 end = REG_END(state);
988 lsqr = token_range(state->text, start, end);
990 /* retrieve right component */
991 /* compute nreg for the last children */
992 for (int i = 0; i < concat->nchildren - 1; i++)
993 state->nreg += 1 + regexp_nsub(concat->children[i]->ctype);
995 start = REG_START(state);
996 end = REG_END(state);
997 rsqr = token_range(state->text, start, end);
999 if (!square_match(lens, lsqr, rsqr)) {
1000 get_error(state, lens, "%s \"%s\" %s \"%s\"",
1001 "Parse error: mismatched in square lens, expecting", lsqr,
1007 RESTORE_REGS(state);
1018 static struct skel *parse_square(struct lens *lens, struct state *state,
1019 struct dict **dict) {
1020 ensure0(lens->tag == L_SQUARE, state->info);
1021 uint end = REG_END(state);
1022 uint start = REG_START(state);
1023 struct skel *skel = NULL, *sk = NULL;
1027 r = match(state, lens->child, lens->child->ctype, end, start);
1028 ERR_NOMEM(r < 0, state->info);
1030 skel = parse_lens(lens->child, state, dict);
1033 sk = make_skel(lens);
1037 RESTORE_REGS(state);
1042 * Helpers for recursive lenses
1046 static void print_frames(struct rec_state *state) {
1047 for (int j = state->fused - 1; j >=0; j--) {
1048 struct frame *f = state->frames + j;
1049 for (int i=0; i < state->lvl; i++) fputc(' ', stderr);
1050 fprintf(stderr, "%2d %s %s", j, f->key, f->value);
1051 if (f->tree == NULL) {
1052 fprintf(stderr, " - ");
1054 fprintf(stderr, " { %s = %s } ", f->tree->label, f->tree->value);
1056 char *s = format_lens(f->lens);
1058 fprintf(stderr, "%s\n", s);
1065 static struct frame *top_frame(struct rec_state *state) {
1066 ensure0(state->fsize > 0, state->state->info);
1067 return state->frames + state->fused - 1;
1070 /* The nth frame from the top of the stack, where 0th frame is the top */
1072 static struct frame *nth_frame(struct rec_state *state, uint n) {
1073 ensure0(state->fsize > n, state->state->info);
1074 return state->frames + state->fused - (n+1);
1077 static struct frame *push_frame(struct rec_state *state, struct lens *lens) {
1080 if (state->fused >= state->fsize) {
1081 uint expand = state->fsize;
1084 r = REALLOC_N(state->frames, state->fsize + expand);
1085 ERR_NOMEM(r < 0, state->state->info);
1086 state->fsize += expand;
1091 struct frame *top = top_frame(state);
1099 static struct frame *pop_frame(struct rec_state *state) {
1100 ensure0(state->fused > 0, state->state->info);
1102 struct frame *result = top_frame(state);
1107 static void dbg_visit(struct lens *lens, char action, size_t start, size_t end,
1108 int fused, int lvl) {
1110 for (int i=0; i < lvl; i++)
1112 lns = format_lens(lens);
1113 fprintf(stderr, "%c %zd..%zd %d %s\n", action, start, end,
1118 static void get_terminal(struct frame *top, struct lens *lens,
1119 struct state *state) {
1120 top->tree = get_lens(lens, state);
1121 top->key = state->key;
1122 top->value = state->value;
1124 state->value = NULL;
1127 static void parse_terminal(struct frame *top, struct lens *lens,
1128 struct state *state) {
1130 top->skel = parse_lens(lens, state, &top->dict);
1131 top->key = state->key;
1135 static void visit_terminal(struct lens *lens, size_t start, size_t end,
1137 struct rec_state *rec_state = data;
1138 struct state *state = rec_state->state;
1141 if (state->error != NULL)
1145 if (debugging("cf.get"))
1146 dbg_visit(lens, 'T', start, end, rec_state->fused, rec_state->lvl);
1147 match(state, lens, lens->ctype, end, start);
1148 struct frame *top = push_frame(rec_state, lens);
1149 ERR_BAIL(state->info);
1150 if (rec_state->mode == M_GET)
1151 get_terminal(top, lens, state);
1153 parse_terminal(top, lens, state);
1154 child = ast_append(rec_state, lens, start, end);
1155 ERR_NOMEM(child == NULL, state->info);
1157 RESTORE_REGS(state);
1160 static bool rec_gen_span(struct rec_state *rec_state) {
1161 return ((rec_state->mode == M_GET)
1162 && (rec_state->state->info->flags & AUG_ENABLE_SPAN));
1165 static void visit_enter(struct lens *lens,
1166 ATTRIBUTE_UNUSED size_t start,
1167 ATTRIBUTE_UNUSED size_t end,
1169 struct rec_state *rec_state = data;
1170 struct state *state = rec_state->state;
1173 if (state->error != NULL)
1176 if (debugging("cf.get"))
1177 dbg_visit(lens, '{', start, end, rec_state->fused, rec_state->lvl);
1178 rec_state->lvl += 1;
1179 if (lens->tag == L_SUBTREE) {
1180 /* Same for parse and get */
1181 /* Use this frame to preserve the current state before we process
1182 the contents of the subtree, i.e., lens->child */
1183 struct frame *f = push_frame(rec_state, lens);
1184 ERR_BAIL(state->info);
1185 f->key = state->key;
1186 f->value = state->value;
1188 state->value = NULL;
1189 if (rec_gen_span(rec_state)) {
1190 f->span = state->span;
1191 state->span = make_span(state->info);
1192 ERR_NOMEM(state->span == NULL, state->info);
1194 } else if (lens->tag == L_MAYBE) {
1195 /* Push a frame as a marker so we can tell whether lens->child
1196 actually had a match or not */
1197 push_frame(rec_state, lens);
1198 ERR_BAIL(state->info);
1200 child = ast_append(rec_state, lens, start, end);
1202 rec_state->ast = child;
1207 /* Combine n frames from the stack into one new frame for M_GET */
1208 static void get_combine(struct rec_state *rec_state,
1209 struct lens *lens, uint n) {
1210 struct tree *tree = NULL, *tail = NULL;
1211 char *key = NULL, *value = NULL;
1212 struct frame *top = NULL;
1214 for (int i=0; i < n; i++) {
1215 top = pop_frame(rec_state);
1216 ERR_BAIL(lens->info);
1217 list_tail_cons(tree, tail, top->tree);
1218 /* top->tree might have more than one node, update tail */
1220 while (tail->next != NULL) tail = tail->next;
1222 if (top->key != NULL) {
1223 ensure(key == NULL, rec_state->state->info);
1226 if (top->value != NULL) {
1227 ensure(value == NULL, rec_state->state->info);
1231 top = push_frame(rec_state, lens);
1232 ERR_BAIL(lens->info);
1240 /* Combine n frames from the stack into one new frame for M_PUT */
1241 static void parse_combine(struct rec_state *rec_state,
1242 struct lens *lens, uint n) {
1243 struct skel *skel = make_skel(lens), *tail = NULL;
1244 struct dict *dict = NULL;
1246 struct frame *top = NULL;
1248 for (int i=0; i < n; i++) {
1249 top = pop_frame(rec_state);
1250 ERR_BAIL(lens->info);
1251 list_tail_cons(skel->skels, tail, top->skel);
1252 /* top->skel might have more than one node, update skel */
1254 while (tail->next != NULL) tail = tail->next;
1255 dict_append(&dict, top->dict);
1256 if (top->key != NULL) {
1257 ensure(key == NULL, rec_state->state->info);
1261 top = push_frame(rec_state, lens);
1262 ERR_BAIL(lens->info);
1263 top->skel = move(skel);
1264 top->dict = move(dict);
1272 static void visit_exit_put_subtree(struct lens *lens,
1273 struct rec_state *rec_state,
1274 struct frame *top) {
1275 struct state *state = rec_state->state;
1276 struct skel *skel = NULL;
1277 struct dict *dict = NULL;
1279 skel = make_skel(lens);
1280 ERR_NOMEM(skel == NULL, lens->info);
1281 dict = make_dict(top->key, top->skel, top->dict);
1282 ERR_NOMEM(dict == NULL, lens->info);
1284 top = pop_frame(rec_state);
1285 ERR_BAIL(state->info);
1286 ensure(lens == top->lens, state->info);
1287 state->key = top->key;
1288 top = push_frame(rec_state, lens);
1289 ERR_BAIL(state->info);
1290 top->skel = move(skel);
1291 top->dict = move(dict);
1297 static void visit_exit(struct lens *lens,
1298 ATTRIBUTE_UNUSED size_t start,
1299 ATTRIBUTE_UNUSED size_t end,
1301 struct rec_state *rec_state = data;
1302 struct state *state = rec_state->state;
1303 struct tree *tree = NULL;
1305 if (state->error != NULL)
1308 rec_state->lvl -= 1;
1309 if (debugging("cf.get"))
1310 dbg_visit(lens, '}', start, end, rec_state->fused, rec_state->lvl);
1312 ERR_BAIL(lens->info);
1314 if (lens->tag == L_SUBTREE) {
1315 /* Get the result of parsing lens->child */
1316 struct frame *top = pop_frame(rec_state);
1317 ERR_BAIL(state->info);
1318 if (rec_state->mode == M_GET) {
1319 tree = make_tree(top->key, top->value, NULL, top->tree);
1320 ERR_NOMEM(tree == NULL, lens->info);
1321 tree->span = state->span;
1322 /* Restore the parse state from before entering this subtree */
1323 top = pop_frame(rec_state);
1324 ERR_BAIL(state->info);
1325 ensure(lens == top->lens, state->info);
1326 state->key = top->key;
1327 state->value = top->value;
1328 state->span = top->span;
1329 /* Push the result of parsing this subtree */
1330 top = push_frame(rec_state, lens);
1331 ERR_BAIL(state->info);
1332 top->tree = move(tree);
1334 visit_exit_put_subtree(lens, rec_state, top);
1336 } else if (lens->tag == L_CONCAT) {
1337 ensure(rec_state->fused >= lens->nchildren, state->info);
1338 for (int i = 0; i < lens->nchildren; i++) {
1339 struct frame *fr = nth_frame(rec_state, i);
1340 ERR_BAIL(state->info);
1341 BUG_ON(lens->children[i] != fr->lens,
1343 "Unexpected lens in concat %zd..%zd\n Expected: %s\n Actual: %s",
1345 format_lens(lens->children[i]),
1346 format_lens(fr->lens));
1348 rec_state->combine(rec_state, lens, lens->nchildren);
1349 } else if (lens->tag == L_STAR) {
1351 while (n < rec_state->fused &&
1352 nth_frame(rec_state, n)->lens == lens->child)
1354 ERR_BAIL(state->info);
1355 rec_state->combine(rec_state, lens, n);
1356 } else if (lens->tag == L_MAYBE) {
1358 if (rec_state->fused > 0
1359 && top_frame(rec_state)->lens == lens->child) {
1362 ERR_BAIL(state->info);
1363 /* If n = 2, the top of the stack is our child's result, and the
1364 frame underneath it is the marker frame we pushed during
1365 visit_enter. Combine these two frames into one, which represents
1366 the result of parsing the whole L_MAYBE. */
1367 rec_state->combine(rec_state, lens, n);
1368 } else if (lens->tag == L_SQUARE) {
1369 if (rec_state->mode == M_GET) {
1370 struct ast *square, *concat, *right, *left;
1374 square = rec_state->ast;
1375 concat = child_first(square);
1376 right = child_first(concat);
1377 left = child_last(concat);
1378 lsqr = token_range(state->text, left->start, left->end);
1379 rsqr = token_range(state->text, right->start, right->end);
1380 ret = square_match(lens, lsqr, rsqr);
1382 get_error(state, lens, "%s \"%s\" %s \"%s\"",
1383 "Parse error: mismatched in square lens, expecting", lsqr,
1391 rec_state->combine(rec_state, lens, 1);
1393 /* Turn the top frame from having the result of one of our children
1394 to being our result */
1395 top_frame(rec_state)->lens = lens;
1396 ERR_BAIL(state->info);
1404 static void visit_error(struct lens *lens, void *data, size_t pos,
1405 const char *format, ...) {
1406 struct rec_state *rec_state = data;
1409 va_start(ap, format);
1410 vget_error(rec_state->state, lens, format, ap);
1412 rec_state->state->error->pos = rec_state->start + pos;
1415 static struct frame *rec_process(enum mode_t mode, struct lens *lens,
1416 struct state *state) {
1417 uint end = REG_END(state);
1418 uint start = REG_START(state);
1421 struct jmt_visitor visitor;
1422 struct rec_state rec_state;
1424 struct frame *f = NULL;
1426 MEMZERO(&rec_state, 1);
1427 MEMZERO(&visitor, 1);
1430 if (lens->jmt == NULL) {
1431 lens->jmt = jmt_build(lens);
1432 ERR_BAIL(lens->info);
1435 rec_state.mode = mode;
1436 rec_state.state = state;
1437 rec_state.fused = 0;
1439 rec_state.start = start;
1440 rec_state.ast = make_ast(lens);
1441 rec_state.combine = (mode == M_GET) ? get_combine : parse_combine;
1442 ERR_NOMEM(rec_state.ast == NULL, state->info);
1444 visitor.parse = jmt_parse(lens->jmt, state->text + start, end - start);
1445 ERR_BAIL(lens->info);
1446 visitor.terminal = visit_terminal;
1447 visitor.enter = visit_enter;
1448 visitor.exit = visit_exit;
1449 visitor.error = visit_error;
1450 visitor.data = &rec_state;
1451 r = jmt_visit(&visitor, &len);
1452 ERR_BAIL(lens->info);
1454 get_error(state, lens, "Syntax error");
1455 state->error->pos = start + len;
1457 if (rec_state.fused == 0) {
1458 get_error(state, lens,
1459 "Parse did not leave a result on the stack");
1461 } else if (rec_state.fused > 1) {
1462 get_error(state, lens,
1463 "Parse left additional garbage on the stack");
1467 rec_state.ast = ast_root(rec_state.ast);
1468 ensure(rec_state.ast->parent == NULL, state->info);
1470 if (debugging("cf.get.ast"))
1471 print_ast(ast_root(rec_state.ast), 0);
1472 RESTORE_REGS(state);
1473 jmt_free_parse(visitor.parse);
1474 free_ast(ast_root(rec_state.ast));
1475 return rec_state.frames;
1478 for(i = 0; i < rec_state.fused; i++) {
1479 f = nth_frame(&rec_state, i);
1482 if (mode == M_GET) {
1485 } else if (mode == M_PARSE) {
1490 FREE(rec_state.frames);
1494 static struct tree *get_rec(struct lens *lens, struct state *state) {
1496 struct tree *tree = NULL;
1498 fr = rec_process(M_GET, lens, state);
1501 state->key = fr->key;
1502 state->value = fr->value;
1508 static struct skel *parse_rec(struct lens *lens, struct state *state,
1509 struct dict **dict) {
1510 struct skel *skel = NULL;
1513 fr = rec_process(M_PARSE, lens, state);
1517 state->key = fr->key;
1523 static struct tree *get_lens(struct lens *lens, struct state *state) {
1524 struct tree *tree = NULL;
1528 tree = get_del(lens, state);
1531 tree = get_store(lens, state);
1534 tree = get_value(lens, state);
1537 tree = get_key(lens, state);
1540 tree = get_label(lens, state);
1543 tree = get_seq(lens, state);
1546 tree = get_counter(lens, state);
1549 tree = get_concat(lens, state);
1552 tree = get_union(lens, state);
1555 tree = get_subtree(lens, state);
1558 tree = get_quant_star(lens, state);
1561 tree = get_quant_maybe(lens, state);
1564 tree = get_square(lens, state);
1567 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
1574 /* Initialize registers. Return 0 if the lens matches the entire text, 1 if
1575 * it does not and -1 on error.
1577 static int init_regs(struct state *state, struct lens *lens, uint size) {
1580 if (lens->tag != L_STAR && ! lens->recursive) {
1581 r = match(state, lens, lens->ctype, size, 0);
1583 get_error(state, lens, "Input string does not match at all");
1588 /* Special case the very common situation that the lens is (l)*
1589 * We can avoid matching the entire text in that case - that
1590 * match can be very expensive
1592 if (ALLOC(state->regs) < 0)
1594 state->regs->num_regs = 1;
1595 if (ALLOC(state->regs->start) < 0 || ALLOC(state->regs->end) < 0)
1597 state->regs->start[0] = 0;
1598 state->regs->end[0] = size;
1602 struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
1603 struct lns_error **err) {
1605 struct tree *tree = NULL;
1606 uint size = strlen(text);
1610 r = ALLOC(state.info);
1611 ERR_NOMEM(r < 0, info);
1613 *state.info = *info;
1614 state.info->ref = UINT_MAX;
1618 /* We are probably being overly cautious here: if the lens can't process
1619 * all of TEXT, we should really fail somewhere in one of the sublenses.
1620 * But to be safe, we check that we can process everything anyway, then
1621 * try to process, hoping we'll get a more specific error, and if that
1622 * fails, we throw our arms in the air and say 'something went wrong'
1624 partial = init_regs(&state, lens, size);
1626 if (lens->recursive)
1627 tree = get_rec(lens, &state);
1629 tree = get_lens(lens, &state);
1632 free_seqs(state.seqs);
1633 if (state.key != NULL) {
1634 get_error(&state, lens, "get left unused key %s", state.key);
1637 if (state.value != NULL) {
1638 get_error(&state, lens, "get left unused value %s", state.value);
1641 if (partial && state.error == NULL) {
1642 get_error(&state, lens, "Get did not match entire input");
1652 if (state.error != NULL) {
1656 free_lns_error(state.error);
1661 static struct skel *parse_lens(struct lens *lens, struct state *state,
1662 struct dict **dict) {
1663 struct skel *skel = NULL;
1667 skel = parse_del(lens, state);
1670 skel = parse_store(lens, state);
1673 skel = parse_value(lens, state);
1676 skel = parse_key(lens, state);
1679 skel = parse_label(lens, state);
1682 skel = parse_seq(lens, state);
1685 skel = parse_counter(lens, state);
1688 skel = parse_concat(lens, state, dict);
1691 skel = parse_union(lens, state, dict);
1694 skel = parse_subtree(lens, state, dict);
1697 skel = parse_quant_star(lens, state, dict);
1700 skel = parse_quant_maybe(lens, state, dict);
1703 skel = parse_square(lens, state, dict);
1706 BUG_ON(true, state->info, "illegal lens tag %d", lens->tag);
1713 struct skel *lns_parse(struct lens *lens, const char *text, struct dict **dict,
1714 struct lns_error **err) {
1716 struct skel *skel = NULL;
1717 uint size = strlen(text);
1721 r = ALLOC(state.info);
1722 ERR_NOMEM(r< 0, lens->info);
1723 state.info->ref = UINT_MAX;
1724 state.info->error = lens->info->error;
1729 partial = init_regs(&state, lens, size);
1732 if (lens->recursive)
1733 skel = parse_rec(lens, &state, dict);
1735 skel = parse_lens(lens, &state, dict);
1737 free_seqs(state.seqs);
1738 if (state.error != NULL) {
1744 if (state.key != NULL) {
1745 get_error(&state, lens, "parse left unused key %s", state.key);
1748 if (state.value != NULL) {
1749 get_error(&state, lens, "parse left unused value %s", state.value);
1753 // This should never happen during lns_parse
1754 get_error(&state, lens, "parse can not process entire input");
1763 free_lns_error(state.error);
1770 * indent-tabs-mode: nil