4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
31 /* This enum must be kept in sync with type_offs and ntypes */
33 CTYPE, ATYPE, KTYPE, VTYPE
36 static const int type_offs[] = {
37 offsetof(struct lens, ctype),
38 offsetof(struct lens, atype),
39 offsetof(struct lens, ktype),
40 offsetof(struct lens, vtype)
42 static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
44 static const char *lens_type_names[] =
45 { "ctype", "atype", "ktype", "vtype" };
47 #define ltype(lns, t) *((struct regexp **) ((char *) lns + type_offs[t]))
49 static struct value * typecheck_union(struct info *,
50 struct lens *l1, struct lens *l2);
51 static struct value *typecheck_concat(struct info *,
52 struct lens *l1, struct lens *l2);
53 static struct value *typecheck_square(struct info *,
54 struct lens *l1, struct lens *l2);
55 static struct value *typecheck_iter(struct info *info, struct lens *l);
56 static struct value *typecheck_maybe(struct info *info, struct lens *l);
58 /* Lens names for pretty printing */
59 /* keep order in sync with enum type */
60 static const char *const tags[] = {
61 "del", "store", "value", "key", "label", "seq", "counter",
63 "subtree", "star", "maybe", "rec", "square"
66 #define ltag(lens) (tags[lens->tag - L_DEL])
68 static const struct string digits_string = {
69 .ref = REF_MAX, .str = (char *) "[0123456789]+"
71 static const struct string *const digits_pat = &digits_string;
73 char *format_lens(struct lens *l) {
75 return strdup("(no lens)");
78 char *inf = format_info(l->info);
81 xasprintf(&result, "%s[%s]%s", tags[l->tag - L_DEL], inf,
82 l->recursive ? "R" : "r");
87 #define BUG_LENS_TAG(lns) bug_lens_tag(lns, __FILE__, __LINE__)
89 static void bug_lens_tag(struct lens *lens, const char *file, int lineno) {
90 if (lens != NULL && lens->info != NULL && lens->info->error != NULL) {
91 char *s = format_lens(lens);
92 bug_on(lens->info->error, file, lineno, "Unexpected lens tag %s", s);
95 /* We are really screwed */
101 /* Construct a finite automaton from REGEXP and return it in *FA.
103 * Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors,
104 * return an exception.
106 static struct value *str_to_fa(struct info *info, const char *pattern,
107 struct fa **fa, int nocase) {
109 struct value *exn = NULL;
111 char *re_str = NULL, *re_err = NULL;
114 error = fa_compile(pattern, strlen(pattern), fa);
115 if (error == REG_NOERROR) {
117 error = fa_nocase(*fa);
118 ERR_NOMEM(error < 0, info);
123 re_str = escape(pattern, -1, RX_ESCAPES);
124 ERR_NOMEM(re_str == NULL, info);
126 exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
128 re_err_len = regerror(error, NULL, NULL, 0);
129 error = ALLOC_N(re_err, re_err_len);
130 ERR_NOMEM(error < 0, info);
132 regerror(error, NULL, re_err, re_err_len);
133 exn_printf_line(exn, "%s", re_err);
142 exn = info->error->exn;
146 static struct value *regexp_to_fa(struct regexp *regexp, struct fa **fa) {
147 return str_to_fa(regexp->info, regexp->pattern->str, fa, regexp->nocase);
150 static struct lens *make_lens(enum lens_tag tag, struct info *info) {
159 static struct lens *make_lens_unop(enum lens_tag tag, struct info *info,
160 struct lens *child) {
161 struct lens *lens = make_lens(tag, info);
163 lens->value = child->value;
164 lens->key = child->key;
168 typedef struct regexp *regexp_combinator(struct info *, int, struct regexp **);
170 static struct lens *make_lens_binop(enum lens_tag tag, struct info *info,
171 struct lens *l1, struct lens *l2,
172 regexp_combinator *combinator) {
173 struct lens *lens = make_lens(tag, info);
174 int n1 = (l1->tag == tag) ? l1->nchildren : 1;
175 struct regexp **types = NULL;
180 lens->nchildren = n1;
181 lens->nchildren += (l2->tag == tag) ? l2->nchildren : 1;
183 lens->recursive = l1->recursive || l2->recursive;
184 lens->rec_internal = l1->rec_internal || l2->rec_internal;
186 if (ALLOC_N(lens->children, lens->nchildren) < 0) {
191 if (l1->tag == tag) {
192 for (int i=0; i < l1->nchildren; i++)
193 lens->children[i] = ref(l1->children[i]);
196 lens->children[0] = l1;
199 if (l2->tag == tag) {
200 for (int i=0; i < l2->nchildren; i++)
201 lens->children[n1 + i] = ref(l2->children[i]);
204 lens->children[n1] = l2;
207 for (int i=0; i < lens->nchildren; i++) {
208 lens->value = lens->value || lens->children[i]->value;
209 lens->key = lens->key || lens->children[i]->key;
212 if (ALLOC_N(types, lens->nchildren) < 0)
215 if (! lens->rec_internal) {
216 /* Inside a recursive lens, we assign types with lns_check_rec
217 * once we know the entire lens */
218 for (int t=0; t < ntypes; t++) {
219 if (lens->recursive && t == CTYPE)
221 for (int i=0; i < lens->nchildren; i++)
222 types[i] = ltype(lens->children[i], t);
223 ltype(lens, t) = (*combinator)(info, lens->nchildren, types);
228 for (int i=0; i < lens->nchildren; i++)
229 ensure(tag != lens->children[i]->tag, lens->info);
238 static struct value *make_lens_value(struct lens *lens) {
240 v = make_value(V_LENS, ref(lens->info));
245 struct value *lns_make_union(struct info *info,
246 struct lens *l1, struct lens *l2, int check) {
247 struct lens *lens = NULL;
248 int consumes_value = l1->consumes_value && l2->consumes_value;
249 int recursive = l1->recursive || l2->recursive;
250 int ctype_nullable = l1->ctype_nullable || l2->ctype_nullable;
253 struct value *exn = typecheck_union(info, l1, l2);
258 lens = make_lens_binop(L_UNION, info, l1, l2, regexp_union_n);
259 lens->consumes_value = consumes_value;
261 lens->ctype_nullable = ctype_nullable;
262 return make_lens_value(lens);
265 struct value *lns_make_concat(struct info *info,
266 struct lens *l1, struct lens *l2, int check) {
267 struct lens *lens = NULL;
268 int consumes_value = l1->consumes_value || l2->consumes_value;
269 int recursive = l1->recursive || l2->recursive;
270 int ctype_nullable = l1->ctype_nullable && l2->ctype_nullable;
273 struct value *exn = typecheck_concat(info, l1, l2);
277 if (l1->value && l2->value) {
278 return make_exn_value(info, "Multiple stores in concat");
280 if (l1->key && l2->key) {
281 return make_exn_value(info, "Multiple keys/labels in concat");
284 lens = make_lens_binop(L_CONCAT, info, l1, l2, regexp_concat_n);
285 lens->consumes_value = consumes_value;
287 lens->ctype_nullable = ctype_nullable;
288 return make_lens_value(lens);
291 static struct regexp *subtree_atype(struct info *info,
292 struct regexp *ktype,
293 struct regexp *vtype) {
294 const char *kpat = (ktype == NULL) ? ENC_NULL : ktype->pattern->str;
295 const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
297 struct regexp *result = NULL;
298 char *ks = NULL, *vs = NULL;
301 if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
302 ks = regexp_expand_nocase(ktype);
303 vs = regexp_expand_nocase(vtype);
304 ERR_NOMEM(ks == NULL || vs == NULL, info);
305 if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
306 ERR_NOMEM(true, info);
309 if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
310 ERR_NOMEM(pat == NULL, info);
314 nocase = ktype->nocase;
315 else if (vtype != NULL)
316 nocase = vtype->nocase;
318 result = make_regexp(info, pat, nocase);
326 * A subtree lens l1 = [ l ]
328 * Types are assigned as follows:
330 * l1->ctype = l->ctype
331 * l1->atype = encode(l->ktype, l->vtype)
335 struct value *lns_make_subtree(struct info *info, struct lens *l) {
338 lens = make_lens_unop(L_SUBTREE, info, l);
339 lens->ctype = ref(l->ctype);
341 lens->atype = subtree_atype(info, l->ktype, l->vtype);
342 lens->value = lens->key = 0;
343 lens->recursive = l->recursive;
344 lens->rec_internal = l->rec_internal;
346 lens->ctype_nullable = l->ctype_nullable;
347 return make_lens_value(lens);
350 struct value *lns_make_star(struct info *info, struct lens *l, int check) {
354 struct value *exn = typecheck_iter(info, l);
359 return make_exn_value(info, "Multiple stores in iteration");
362 return make_exn_value(info, "Multiple keys/labels in iteration");
365 lens = make_lens_unop(L_STAR, info, l);
366 for (int t = 0; t < ntypes; t++) {
367 ltype(lens, t) = regexp_iter(info, ltype(l, t), 0, -1);
369 lens->recursive = l->recursive;
370 lens->rec_internal = l->rec_internal;
371 lens->ctype_nullable = 1;
372 return make_lens_value(lens);
375 struct value *lns_make_plus(struct info *info, struct lens *l, int check) {
376 struct value *star, *conc;
378 star = lns_make_star(info, l, check);
382 conc = lns_make_concat(ref(info), ref(l), ref(star->lens), check);
387 struct value *lns_make_maybe(struct info *info, struct lens *l, int check) {
391 struct value *exn = typecheck_maybe(info, l);
395 lens = make_lens_unop(L_MAYBE, info, l);
396 for (int t=0; t < ntypes; t++)
397 ltype(lens, t) = regexp_maybe(info, ltype(l, t));
398 lens->value = l->value;
400 lens->recursive = l->recursive;
401 lens->rec_internal = l->rec_internal;
402 lens->ctype_nullable = 1;
403 return make_lens_value(lens);
406 /* The ctype of SQR is a regular approximation of the true ctype of SQR
407 * at this point. In some situations, for example in processing quoted
408 * strings this leads to false typecheck errors; to lower the chances
409 * of these, we try to construct the precise ctype of SQR if the
410 * language of L1 is finite (and has a small number of words)
412 static void square_precise_type(struct info *info,
415 struct regexp *body) {
419 struct fa *fa = NULL;
420 struct value *exn = NULL;
421 struct regexp **u = NULL, *c[3], *w = NULL;
423 exn = str_to_fa(info, left->pattern->str, &fa, left->nocase);
427 nwords = fa_enumerate(fa, 10, &words); /* The limit of 10 is arbitrary */
431 r = ALLOC_N(u, nwords);
432 ERR_NOMEM(r < 0, info);
435 for (int i=0; i < nwords; i++) {
436 w = make_regexp_literal(left->info, words[i]);
437 ERR_NOMEM(w == NULL, info);
438 w->nocase = left->nocase;
441 u[i] = regexp_concat_n(info, 3, c);
444 ERR_NOMEM(u[i] == NULL, info);
446 w = regexp_union_n(info, nwords, u);
455 for (int i=0; i < nwords; i++) {
466 /* Build a square lens as
467 * left . body . right
468 * where left and right accepts the same language and
469 * captured strings must match. The inability to express this with other
470 * lenses makes the square primitive necessary.
472 struct value * lns_make_square(struct info *info, struct lens *l1,
473 struct lens *l2, struct lens *l3, int check) {
474 struct value *cnt1 = NULL, *cnt2 = NULL, *res = NULL;
475 struct lens *sqr = NULL;
477 /* supported types: L_KEY . body . L_DEL or L_DEL . body . L_DEL */
478 if (l3->tag != L_DEL || (l1->tag != L_DEL && l1->tag != L_KEY))
479 return make_exn_value(info, "Supported types: (key lns del) or (del lns del)");
481 res = typecheck_square(info, l1, l3);
485 res = lns_make_concat(ref(info), ref(l1), ref(l2), check);
489 res = lns_make_concat(ref(info), ref(cnt1->lens), ref(l3), check);
494 sqr = make_lens_unop(L_SQUARE, ref(info), ref(cnt2->lens));
495 ERR_NOMEM(sqr == NULL, info);
497 for (int t=0; t < ntypes; t++)
498 ltype(sqr, t) = ref(ltype(cnt2->lens, t));
500 square_precise_type(info, &(sqr->ctype), l1->ctype, l2->ctype);
502 sqr->recursive = cnt2->lens->recursive;
503 sqr->rec_internal = cnt2->lens->rec_internal;
504 sqr->consumes_value = cnt2->lens->consumes_value;
506 res = make_lens_value(sqr);
507 ERR_NOMEM(res == NULL, info);
525 static struct regexp *make_regexp_from_string(struct info *info,
526 struct string *string) {
531 r->pattern = ref(string);
537 static struct regexp *restrict_regexp(struct regexp *r) {
539 struct regexp *result = NULL;
543 ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
545 RESERVED_FROM_CH, RESERVED_TO_CH);
546 ERR_NOMEM(ret == REG_ESPACE || ret < 0, r->info);
547 BUG_ON(ret != 0, r->info, NULL);
548 ensure(nre_len == strlen(nre), r->info);
550 ret = regexp_c_locale(&nre, &nre_len);
551 ERR_NOMEM(ret < 0, r->info);
553 result = make_regexp(r->info, nre, r->nocase);
555 BUG_ON(regexp_compile(result) != 0, r->info,
556 "Could not compile restricted regexp");
561 unref(result, regexp);
565 static struct value *
566 typecheck_prim(enum lens_tag tag, struct info *info,
567 struct regexp *regexp, struct string *string) {
568 struct fa *fa_slash = NULL;
569 struct fa *fa_key = NULL;
570 struct fa *fa_isect = NULL;
571 struct value *exn = NULL;
575 exn = str_to_fa(info, "(.|\n)*/(.|\n)*", &fa_slash, regexp->nocase);
579 exn = regexp_to_fa(regexp, &fa_key);
583 fa_isect = fa_intersect(fa_slash, fa_key);
584 if (! fa_is_basic(fa_isect, FA_EMPTY)) {
585 exn = make_exn_value(info,
586 "The key regexp /%s/ matches a '/'", regexp->pattern->str);
592 fa_isect = fa_key = fa_slash = NULL;
593 } else if (tag == L_LABEL) {
594 if (strchr(string->str, SEP) != NULL) {
595 exn = make_exn_value(info,
596 "The label string \"%s\" contains a '/'", string->str);
599 } else if (tag == L_DEL && string != NULL) {
601 const char *dflt = string->str;
602 cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
603 if (cnt != strlen(dflt)) {
604 char *s = escape(dflt, -1, RX_ESCAPES);
605 char *r = regexp_escape(regexp);
606 exn = make_exn_value(info,
607 "del: the default value '%s' does not match /%s/", s, r);
621 struct value *lns_make_prim(enum lens_tag tag, struct info *info,
622 struct regexp *regexp, struct string *string) {
623 struct lens *lens = NULL;
624 struct value *exn = NULL;
626 if (typecheck_p(info)) {
627 exn = typecheck_prim(tag, info, regexp, string);
632 /* Build the actual lens */
633 lens = make_lens(tag, info);
634 lens->regexp = regexp;
635 lens->string = string;
636 lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ);
637 lens->value = (tag == L_STORE || tag == L_VALUE);
638 lens->consumes_value = (tag == L_STORE || tag == L_VALUE);
639 lens->atype = regexp_make_empty(info);
641 if (tag == L_DEL || tag == L_STORE || tag == L_KEY) {
642 lens->ctype = ref(regexp);
643 lens->ctype_nullable = regexp_matches_empty(lens->ctype);
644 } else if (tag == L_LABEL || tag == L_VALUE
645 || tag == L_SEQ || tag == L_COUNTER) {
646 lens->ctype = regexp_make_empty(info);
647 lens->ctype_nullable = 1;
657 make_regexp_from_string(info, (struct string *) digits_pat);
658 if (lens->ktype == NULL)
660 } else if (tag == L_KEY) {
661 lens->ktype = restrict_regexp(lens->regexp);
662 } else if (tag == L_LABEL) {
663 lens->ktype = make_regexp_literal(info, lens->string->str);
664 if (lens->ktype == NULL)
669 if (tag == L_STORE) {
670 lens->vtype = restrict_regexp(lens->regexp);
671 } else if (tag == L_VALUE) {
672 lens->vtype = make_regexp_literal(info, lens->string->str);
675 return make_lens_value(lens);
681 * Typechecking of lenses
683 static struct value *disjoint_check(struct info *info, bool is_get,
684 struct regexp *r1, struct regexp *r2) {
685 struct fa *fa1 = NULL;
686 struct fa *fa2 = NULL;
687 struct fa *fa = NULL;
688 struct value *exn = NULL;
689 const char *const msg = is_get ? "union.get" : "tree union.put";
691 if (r1 == NULL || r2 == NULL)
694 exn = regexp_to_fa(r1, &fa1);
698 exn = regexp_to_fa(r2, &fa2);
702 fa = fa_intersect(fa1, fa2);
703 if (! fa_is_basic(fa, FA_EMPTY)) {
706 fa_example(fa, &xmpl, &xmpl_len);
708 char *fmt = enc_format(xmpl, xmpl_len);
714 exn = make_exn_value(ref(info),
715 "overlapping lenses in %s", msg);
718 exn_printf_line(exn, "Example matched by both: '%s'", xmpl);
720 exn_printf_line(exn, "Example matched by both: %s", xmpl);
732 static struct value *typecheck_union(struct info *info,
733 struct lens *l1, struct lens *l2) {
734 struct value *exn = NULL;
736 exn = disjoint_check(info, true, l1->ctype, l2->ctype);
738 exn = disjoint_check(info, false, l1->atype, l2->atype);
741 char *fi = format_info(l1->info);
742 exn_printf_line(exn, "First lens: %s", fi);
745 fi = format_info(l2->info);
746 exn_printf_line(exn, "Second lens: %s", fi);
752 static struct value *
753 ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
754 enum lens_type typ, struct lens *l1, struct lens *l2,
755 const char *msg, bool iterated) {
758 struct value *exn = NULL;
761 r = fa_ambig_example(fa1, fa2, &upv, &upv_len, &pv, &v);
763 exn = make_exn_value(ref(info), "not enough memory");
767 ERR_REPORT(info, AUG_ENOMEM, NULL);
768 return info->error->exn;
773 char *e_u, *e_up, *e_upv, *e_pv, *e_v;
777 e_u = enc_format(upv, pv - upv);
778 e_up = enc_format(upv, v - upv);
779 e_upv = enc_format(upv, upv_len);
780 e_pv = enc_format(pv, strlen(pv));
781 e_v = enc_format(v, strlen(v));
782 lns_format_atype(l1, &s1);
783 lns_format_atype(l2, &s2);
785 e_u = escape(upv, pv - upv, RX_ESCAPES);
786 e_up = escape(upv, v - upv, RX_ESCAPES);
787 e_upv = escape(upv, -1, RX_ESCAPES);
788 e_pv = escape(pv, -1, RX_ESCAPES);
789 e_v = escape(v, -1, RX_ESCAPES);
790 s1 = regexp_escape(ltype(l1, typ));
791 s2 = regexp_escape(ltype(l2, typ));
793 exn = make_exn_value(ref(info), "%s", msg);
795 exn_printf_line(exn, " Iterated regexp: /%s/", s1);
797 exn_printf_line(exn, " First regexp: /%s/", s1);
798 exn_printf_line(exn, " Second regexp: /%s/", s2);
800 exn_printf_line(exn, " '%s' can be split into", e_upv);
801 exn_printf_line(exn, " '%s|=|%s'\n", e_u, e_pv);
802 exn_printf_line(exn, " and");
803 exn_printf_line(exn, " '%s|=|%s'\n", e_up, e_v);
816 static struct value *
817 ambig_concat_check(struct info *info, const char *msg,
818 enum lens_type typ, struct lens *l1, struct lens *l2) {
819 struct fa *fa1 = NULL;
820 struct fa *fa2 = NULL;
821 struct value *result = NULL;
822 struct regexp *r1 = ltype(l1, typ);
823 struct regexp *r2 = ltype(l2, typ);
825 if (r1 == NULL || r2 == NULL)
828 result = regexp_to_fa(r1, &fa1);
832 result = regexp_to_fa(r2, &fa2);
836 result = ambig_check(info, fa1, fa2, typ, l1, l2, msg, false);
843 static struct value *typecheck_concat(struct info *info,
844 struct lens *l1, struct lens *l2) {
845 struct value *result = NULL;
847 result = ambig_concat_check(info, "ambiguous concatenation",
849 if (result == NULL) {
850 result = ambig_concat_check(info, "ambiguous tree concatenation",
853 if (result != NULL) {
854 char *fi = format_info(l1->info);
855 exn_printf_line(result, "First lens: %s", fi);
857 fi = format_info(l2->info);
858 exn_printf_line(result, "Second lens: %s", fi);
864 static struct value *make_exn_square(struct info *info, struct lens *l1,
865 struct lens *l2, const char *msg) {
868 struct value *exn = make_exn_value(ref(info), "%s",
869 "Inconsistency in lens square");
870 exn_printf_line(exn, "%s", msg);
871 fi = format_info(l1->info);
872 exn_printf_line(exn, "Left lens: %s", fi);
874 fi = format_info(l2->info);
875 exn_printf_line(exn, "Right lens: %s", fi);
880 static struct value *typecheck_square(struct info *info, struct lens *l1,
883 struct value *exn = NULL;
884 struct fa *fa1 = NULL, *fa2 = NULL;
885 struct regexp *r1 = ltype(l1, CTYPE);
886 struct regexp *r2 = ltype(l2, CTYPE);
888 if (r1 == NULL || r2 == NULL)
891 exn = regexp_to_fa(r1, &fa1);
895 exn = regexp_to_fa(r2, &fa2);
899 r = fa_equals(fa1, fa2);
902 exn = make_exn_value(ref(info), "not enough memory");
906 ERR_REPORT(info, AUG_ENOMEM, NULL);
907 return info->error->exn;;
912 exn = make_exn_square(info, l1, l2,
913 "Left and right lenses must accept the same language");
917 /* check del create consistency */
918 if (l1->tag == L_DEL && l2->tag == L_DEL) {
919 if (!STREQ(l1->string->str, l2->string->str)) {
920 exn = make_exn_square(info, l1, l2,
921 "Left and right lenses must have the same default value");
932 static struct value *
933 ambig_iter_check(struct info *info, const char *msg,
934 enum lens_type typ, struct lens *l) {
935 struct fa *fas = NULL, *fa = NULL;
936 struct value *result = NULL;
937 struct regexp *r = ltype(l, typ);
942 result = regexp_to_fa(r, &fa);
946 fas = fa_iter(fa, 0, -1);
948 result = ambig_check(info, fa, fas, typ, l, l, msg, true);
956 static struct value *typecheck_iter(struct info *info, struct lens *l) {
957 struct value *result = NULL;
959 result = ambig_iter_check(info, "ambiguous iteration", CTYPE, l);
960 if (result == NULL) {
961 result = ambig_iter_check(info, "ambiguous tree iteration", ATYPE, l);
963 if (result != NULL) {
964 char *fi = format_info(l->info);
965 exn_printf_line(result, "Iterated lens: %s", fi);
971 static struct value *typecheck_maybe(struct info *info, struct lens *l) {
972 /* Check (r)? as (<e>|r) where <e> is the empty language */
973 struct value *exn = NULL;
975 if (l->ctype != NULL && regexp_matches_empty(l->ctype)) {
976 exn = make_exn_value(ref(info),
977 "illegal optional expression: /%s/ matches the empty word",
978 l->ctype->pattern->str);
981 /* Typecheck the put direction; the check passes if
982 (1) the atype does not match the empty string, because we can tell
983 from looking at tree nodes whether L should be applied or not
984 (2) L handles a value; with that, we know whether to apply L or not
985 depending on whether the current node has a non NULL value or not
987 if (exn == NULL && ! l->consumes_value) {
988 if (l->atype != NULL && regexp_matches_empty(l->atype)) {
989 exn = make_exn_value(ref(info),
990 "optional expression matches the empty tree but does not consume a value");
996 void free_lens(struct lens *lens) {
999 ensure(lens->ref == 0, lens->info);
1001 if (debugging("lenses"))
1002 dump_lens_tree(lens);
1003 switch (lens->tag) {
1005 unref(lens->regexp, regexp);
1006 unref(lens->string, string);
1010 unref(lens->regexp, regexp);
1016 unref(lens->string, string);
1022 unref(lens->child, lens);
1026 for (int i=0; i < lens->nchildren; i++)
1027 unref(lens->children[i], lens);
1028 free(lens->children);
1031 if (!lens->rec_internal) {
1032 unref(lens->body, lens);
1040 for (int t=0; t < ntypes; t++)
1041 unref(ltype(lens, t), regexp);
1043 unref(lens->info, info);
1044 jmt_free(lens->jmt);
1050 void lens_release(struct lens *lens) {
1054 for (int t=0; t < ntypes; t++)
1055 regexp_release(ltype(lens, t));
1057 if (lens->tag == L_KEY || lens->tag == L_STORE)
1058 regexp_release(lens->regexp);
1060 if (lens->tag == L_SUBTREE || lens->tag == L_STAR
1061 || lens->tag == L_MAYBE || lens->tag == L_SQUARE) {
1062 lens_release(lens->child);
1065 if (lens->tag == L_UNION || lens->tag == L_CONCAT) {
1066 for (int i=0; i < lens->nchildren; i++) {
1067 lens_release(lens->children[i]);
1071 if (lens->tag == L_REC && !lens->rec_internal) {
1072 lens_release(lens->body);
1075 jmt_free(lens->jmt);
1080 * Encoding of tree levels
1082 char *enc_format(const char *e, size_t len) {
1083 return enc_format_indent(e, len, 0);
1086 char *enc_format_indent(const char *e, size_t len, int indent) {
1088 char *result = NULL, *r;
1091 while (*k && k - e < len) {
1092 char *eq, *slash, *v;
1093 eq = strchr(k, ENC_EQ_CH);
1095 slash = strchr(eq, ENC_SLASH_CH);
1096 assert(slash != NULL);
1101 size += 6; /* Surrounding braces */
1103 size += 1 + (eq - k) + 1;
1105 size += 4 + (slash - v) + 1;
1108 if (ALLOC_N(result, size + 1) < 0)
1113 while (*k && k - e < len) {
1114 char *eq, *slash, *v;
1115 eq = strchr(k, ENC_EQ_CH);
1116 slash = strchr(eq, ENC_SLASH_CH);
1117 assert(eq != NULL && slash != NULL);
1120 for (int i=0; i < indent; i++)
1122 r = stpcpy(r, " { ");
1124 r = stpcpy(r, "\"");
1125 r = stpncpy(r, k, eq - k);
1126 r = stpcpy(r, "\"");
1129 r = stpcpy (r, " = \"");
1130 r = stpncpy(r, v, slash - v);
1131 r = stpcpy(r, "\"");
1133 r = stpcpy(r, " }");
1141 static int format_atype(struct lens *l, char **buf, uint indent);
1143 static int format_indent(char **buf, uint indent) {
1144 if (ALLOC_N(*buf, indent+1) < 0)
1146 memset(*buf, ' ', indent);
1150 static int format_subtree_atype(struct lens *l, char **buf, uint indent) {
1151 char *k = NULL, *v = NULL;
1152 const struct regexp *ktype = l->child->ktype;
1153 const struct regexp *vtype = l->child->vtype;
1157 if (format_indent(&si, indent) < 0)
1160 if (ktype != NULL) {
1161 k = regexp_escape(ktype);
1165 if (vtype != NULL) {
1166 v = regexp_escape(vtype);
1170 r = xasprintf(buf, "%s{ = /%s/ }", si, k, v);
1172 r = xasprintf(buf, "%s{ /%s/ = /%s/ }", si, k, v);
1175 r = xasprintf(buf, "%s{ }", si, k);
1177 r = xasprintf(buf, "%s{ /%s/ }", si, k);
1190 static int format_rep_atype(struct lens *l, char **buf,
1191 uint indent, char quant) {
1195 r = format_atype(l->child, &a, indent);
1198 if (strlen(a) == 0) {
1205 if (l->child->tag == L_CONCAT || l->child->tag == L_UNION)
1206 r = xasprintf(buf, "(%s)%c", a, quant);
1208 r = xasprintf(buf, "%s%c", a, quant);
1219 static int format_concat_atype(struct lens *l, char **buf, uint indent) {
1220 char **c = NULL, *s = NULL, *p;
1222 size_t len = 0, nconc = 0;
1224 if (ALLOC_N(c, l->nchildren) < 0)
1227 for (int i=0; i < l->nchildren; i++) {
1228 r = format_atype(l->children[i], c+i, indent);
1231 len += strlen(c[i]) + 3;
1232 if (strlen(c[i]) > 0)
1234 if (l->children[i]->tag == L_UNION)
1238 if (ALLOC_N(s, len+1) < 0)
1241 for (int i=0; i < l->nchildren; i++) {
1242 bool needs_parens = nconc > 1 && l->children[i]->tag == L_UNION;
1243 if (strlen(c[i]) == 0)
1249 for (int j=0; j < indent; j++)
1263 for (int i=0; i < l->nchildren; i++)
1270 static int format_union_atype(struct lens *l, char **buf, uint indent) {
1271 char **c = NULL, *s = NULL, *p;
1275 if (ALLOC_N(c, l->nchildren) < 0)
1278 /* Estimate the length of the string we will build. The calculation
1279 overestimates that length so that the logic is a little simpler than
1280 in the loop where we actually build the string */
1281 for (int i=0; i < l->nchildren; i++) {
1282 r = format_atype(l->children[i], c+i, indent + 2);
1285 /* We will add c[i] and some fixed characters */
1286 len += strlen(c[i]) + strlen("\n| ()");
1287 if (strlen(c[i]) < indent+2) {
1288 /* We will add indent+2 whitespace */
1293 if (ALLOC_N(s, len+1) < 0)
1297 for (int i=0; i < l->nchildren; i++) {
1301 if (strlen(t) >= indent+2) {
1302 /* c[i] is not just whitespace */
1303 p = stpncpy(p, t, indent+2);
1306 /* c[i] is just whitespace, make sure we indent the
1307 '|' appropriately */
1308 memset(p, ' ', indent+2);
1311 p = stpcpy(p, "| ");
1313 /* Skip additional indent */
1317 p = stpcpy(p, "()");
1326 for (int i=0; i < l->nchildren; i++)
1333 static int format_rec_atype(struct lens *l, char **buf, uint indent) {
1336 if (l->rec_internal) {
1337 *buf = strdup("<<rec>>");
1338 return (*buf == NULL) ? -1 : 0;
1342 r = format_atype(l->body, &c, indent);
1345 r = xasprintf(buf, "<<rec:%s>>", c);
1347 return (r < 0) ? -1 : 0;
1350 static int format_atype(struct lens *l, char **buf, uint indent) {
1362 return (*buf == NULL) ? -1 : 0;
1365 return format_subtree_atype(l, buf, indent);
1368 return format_rep_atype(l, buf, indent, '*');
1371 return format_rep_atype(l, buf, indent, '?');
1374 return format_concat_atype(l, buf, indent);
1377 return format_union_atype(l, buf, indent);
1380 return format_rec_atype(l, buf, indent);
1383 return format_concat_atype(l->child, buf, indent);
1392 int lns_format_atype(struct lens *l, char **buf) {
1394 r = format_atype(l, buf, 4);
1401 struct value *lns_make_rec(struct info *info) {
1402 struct lens *l = make_lens(L_REC, info);
1404 l->rec_internal = 1;
1406 return make_lens_value(l);
1409 /* Transform a recursive lens into a recursive transition network
1411 * First, we transform the lens into context free grammar, considering any
1412 * nonrecursive lens as a terminal
1414 * cfg: lens -> nonterminal -> production list
1416 * cfg(primitive, N) -> N := regexp(primitive)
1417 * cfg(l1 . l2, N) -> N := N1 . N2 + cfg(l1, N1) + cfg(l2, N2)
1418 * cfg(l1 | l2, N) -> N := N1 | N2 + cfg(l1, N1) + cfg(l2, N2)
1419 * cfg(l*, N) -> N := N . N' | eps + cfg(l, N')
1420 * cfg([ l ], N) -> N := N' + cfg(l, N')
1422 * We use the lenses as nonterminals themselves; this also means that our
1423 * productions are normalized such that the RHS is either a terminal
1424 * (regexp) or entirely consists of nonterminals
1426 * In a few places, we need to know that a nonterminal corresponds to a
1427 * subtree combinator ([ l ]); this is the main reason that the rule (cfg[
1428 * l ], N) introduces a useless production N := N'.
1430 * Computing the types for a recursive lens r is (fairly) straightforward,
1431 * given the above grammar, which we convert to an automaton following
1432 * http://arxiv.org/abs/cs/9910022; the only complication arises from the
1433 * subtree combinator, since it can be used in recursive lenses to
1434 * construct trees of arbitrary depth, but we need to approximate the types
1435 * of r in a way that fits with our top-down tree automaton in put.c.
1437 * To handle subtree combinators, remember that the type rules for a lens
1443 * m.atype = enc(l.ktype, l.vtype)
1444 * ( enc is a function regexp -> regexp -> regexp)
1446 * We compute types for r by modifying its automaton according to
1447 * Nederhof's paper and reducing it to a regular expression of lenses. This
1448 * has to happen in the following steps:
1449 * r.ktype : approximate by using [ .. ].ktype = NULL
1450 * r.vtype : same as r.ktype
1451 * r.ctype : approximate by treating [ l ] as l
1452 * r.atype : approximate by using r.ktype and r.vtype from above
1453 * in lens expressions [ f(r) ]
1456 /* Transitions go to a state and are labeled with a lens. For epsilon
1457 * transitions, lens may be NULL. When lens is a simple (nonrecursive
1458 * lens), PROD will be NULL. When we modify the automaton to splice
1459 * nonterminals in, we remember the production for the nonterminal in PROD.
1468 struct state *next; /* Linked list for memory management */
1470 struct trans *trans;
1473 /* Productions for lens LENS. Start state START and end state END. If we
1474 start with START, END is the only accepting state. */
1477 struct state *start;
1481 /* A recursive transition network used to compute regular approximations
1487 struct state *states; /* Linked list through next of all states in all
1488 prods; the states for each production are on
1489 the part of the list from prod->start to
1492 enum lens_type lens_type;
1493 unsigned int check : 1;
1496 #define RTN_BAIL(rtn) if ((rtn)->exn != NULL || \
1497 (rtn)->info->error->code != AUG_NOERROR) \
1500 static void free_prod(struct prod *prod) {
1503 unref(prod->lens, lens);
1507 static void free_rtn(struct rtn *rtn) {
1510 for (int i=0; i < rtn->nprod; i++)
1511 free_prod(rtn->prod[i]);
1513 list_for_each(s, rtn->states) {
1514 for (int i=0; i < s->ntrans; i++) {
1515 unref(s->trans[i].lens, lens);
1516 unref(s->trans[i].re, regexp);
1520 list_free(rtn->states);
1521 unref(rtn->info, info);
1522 unref(rtn->exn, value);
1526 static struct state *add_state(struct prod *prod) {
1527 struct state *result = NULL;
1531 ERR_NOMEM(r < 0, prod->lens->info);
1533 list_cons(prod->start->next, result);
1538 static struct trans *add_trans(struct rtn *rtn, struct state *state,
1539 struct state *to, struct lens *l) {
1541 struct trans *result = NULL;
1543 for (int i=0; i < state->ntrans; i++)
1544 if (state->trans[i].to == to && state->trans[i].lens == l)
1545 return state->trans + i;
1547 r = REALLOC_N(state->trans, state->ntrans+1);
1548 ERR_NOMEM(r < 0, rtn->info);
1550 result = state->trans + state->ntrans;
1556 result->lens = ref(l);
1557 result->re = ref(ltype(l, rtn->lens_type));
1563 static struct prod *make_prod(struct rtn *rtn, struct lens *l) {
1564 struct prod *result = NULL;
1568 ERR_NOMEM(r < 0, l->info);
1570 result->lens = ref(l);
1571 r = ALLOC(result->start);
1572 ERR_NOMEM(r < 0, l->info);
1574 result->end = add_state(result);
1577 result->end->next = rtn->states;
1578 rtn->states = result->start;
1586 static struct prod *prod_for_lens(struct rtn *rtn, struct lens *l) {
1589 for (int i=0; i < rtn->nprod; i++) {
1590 if (rtn->prod[i]->lens == l)
1591 return rtn->prod[i];
1596 static void rtn_dot(struct rtn *rtn, const char *stage) {
1600 fp = debug_fopen("rtn_%s_%s.dot", stage, lens_type_names[rtn->lens_type]);
1604 fprintf(fp, "digraph \"l1\" {\n rankdir=LR;\n");
1605 list_for_each(s, rtn->states) {
1607 for (int p=0; p < rtn->nprod; p++) {
1608 if (s == rtn->prod[p]->start) {
1609 r = xasprintf(&label, "s%d", p);
1610 } else if (s == rtn->prod[p]->end) {
1611 r = xasprintf(&label, "e%d", p);
1613 ERR_NOMEM(r < 0, rtn->info);
1615 if (label == NULL) {
1616 r = xasprintf(&label, "%p", s);
1617 ERR_NOMEM(r < 0, rtn->info);
1619 fprintf(fp, " n%p [label = \"%s\"];\n", s, label == NULL ? "" : label);
1621 for (int i=0; i < s->ntrans; i++) {
1622 fprintf(fp, " n%p -> n%p", s, s->trans[i].to);
1623 if (s->trans[i].re != NULL) {
1624 label = regexp_escape(s->trans[i].re);
1625 for (char *t = label; *t; t++)
1628 fprintf(fp, " [ label = \"%s\" ]", label);
1639 /* Add transitions to RTN corresponding to cfg(l, N) */
1640 static void rtn_rules(struct rtn *rtn, struct lens *l) {
1644 struct prod *prod = prod_for_lens(rtn, l);
1648 int r = REALLOC_N(rtn->prod, rtn->nprod+1);
1649 ERR_NOMEM(r < 0, l->info);
1651 prod = make_prod(rtn, l);
1652 rtn->prod[rtn->nprod] = prod;
1656 struct state *start = prod->start;
1660 /* cfg(l1|..|ln, N) -> N := N1 | N2 | ... | Nn */
1661 for (int i=0; i < l->nchildren; i++) {
1662 add_trans(rtn, start, prod->end, l->children[i]);
1664 rtn_rules(rtn, l->children[i]);
1669 /* cfg(l1 . l2 ... ln, N) -> N := N1 . N2 ... Nn */
1670 for (int i=0; i < l->nchildren-1; i++) {
1671 struct state *s = add_state(prod);
1673 add_trans(rtn, start, s, l->children[i]);
1676 rtn_rules(rtn, l->children[i]);
1680 struct lens *c = l->children[l->nchildren - 1];
1681 add_trans(rtn, start, prod->end, c);
1688 /* cfg(l*, N) -> N := N . N' | eps */
1689 struct state *s = add_state(prod);
1691 add_trans(rtn, start, s, l);
1693 add_trans(rtn, s, prod->end, l->child);
1695 add_trans(rtn, start, prod->end, NULL);
1697 rtn_rules(rtn, l->child);
1702 switch (rtn->lens_type) {
1705 /* cfg([ l ], N) -> N := eps */
1706 add_trans(rtn, start, prod->end, NULL);
1709 /* cfg([ l ], N) -> N := N' plus cfg(l, N') */
1710 add_trans(rtn, start, prod->end, l->child);
1712 rtn_rules(rtn, l->child);
1716 /* At this point, we have propagated ktype and vtype */
1717 /* cfg([ l ], N) -> N := enc(l->ktype, l->vtype) */
1718 struct trans *t = add_trans(rtn, start, prod->end, NULL);
1720 t->re = subtree_atype(l->info, l->child->ktype, l->child->vtype);
1724 BUG_ON(true, rtn->info, "Unexpected lens type %d", rtn->lens_type);
1729 /* cfg(l?, N) -> N := N' | eps plus cfg(l, N') */
1730 add_trans(rtn, start, prod->end, l->child);
1732 add_trans(rtn, start, prod->end, NULL);
1734 rtn_rules(rtn, l->child);
1738 /* cfg(l, N) -> N := N' plus cfg(l->body, N') */
1739 add_trans(rtn, start, prod->end, l->body);
1741 rtn_rules(rtn, l->body);
1745 add_trans(rtn, start, prod->end, l->child);
1756 /* Replace transition t with two epsilon transitions s => p->start and
1757 * p->end => s->trans[i].to where s is the start of t. Instead of adding
1758 * epsilon transitions, we expand the epsilon transitions.
1760 static void prod_splice(struct rtn *rtn,
1761 struct prod *from, struct prod *to, struct trans *t) {
1763 add_trans(rtn, to->end, t->to, NULL);
1764 ERR_BAIL(from->lens->info);
1766 unref(t->re, regexp);
1772 static void rtn_splice(struct rtn *rtn, struct prod *prod) {
1773 for (struct state *s = prod->start; s != prod->end; s = s->next) {
1774 for (int i=0; i < s->ntrans; i++) {
1775 struct prod *p = prod_for_lens(rtn, s->trans[i].lens);
1777 prod_splice(rtn, prod, p, s->trans+i);
1786 static struct rtn *rtn_build(struct lens *rec, enum lens_type lt) {
1791 ERR_NOMEM(r < 0, rec->info);
1793 rtn->info = ref(rec->info);
1794 rtn->lens_type = lt;
1796 rtn_rules(rtn, rec);
1798 if (debugging("cf.approx"))
1799 rtn_dot(rtn, "10-rules");
1801 for (int i=0; i < rtn->nprod; i++) {
1802 rtn_splice(rtn, rtn->prod[i]);
1805 if (debugging("cf.approx"))
1806 rtn_dot(rtn, "11-splice");
1812 /* Compare transitions lexicographically by (to, lens) */
1813 static int trans_to_cmp(const void *v1, const void *v2) {
1814 const struct trans *t1 = v1;
1815 const struct trans *t2 = v2;
1817 if (t1->to != t2->to)
1818 return (t1->to < t2->to) ? -1 : 1;
1820 if (t1->lens == t2->lens)
1822 return (t1->lens < t2->lens) ? -1 : 1;
1825 /* Collapse a transition S1 -> S -> S2 by adding a transition S1 -> S2 with
1826 * lens R1 . (LOOP)* . R2 | R3 where R3 is the regexp on the possibly
1827 * existing transition S1 -> S2. If LOOP is NULL or R3 does not exist,
1828 * label the transition with a simplified regexp by treating NULL as
1830 static void collapse_trans(struct rtn *rtn,
1831 struct state *s1, struct state *s2,
1832 struct regexp *r1, struct regexp *loop,
1833 struct regexp *r2) {
1835 struct trans *t = NULL;
1836 struct regexp *r = NULL;
1838 for (int i=0; i < s1->ntrans; i++) {
1839 if (s1->trans[i].to == s2) {
1845 /* Set R = R1 . (LOOP)* . R2, treating NULL's as epsilon */
1849 else if (r2 == NULL)
1852 r = regexp_concat(rtn->info, r1, r2);
1854 struct regexp *s = regexp_iter(rtn->info, loop, 0, -1);
1855 ERR_NOMEM(s == NULL, rtn->info);
1856 struct regexp *c = NULL;
1861 c = regexp_concat(rtn->info, r1, s);
1863 ERR_NOMEM(c == NULL, rtn->info);
1869 r = regexp_concat(rtn->info, c, r2);
1871 ERR_NOMEM(r == NULL, rtn->info);
1876 t = add_trans(rtn, s1, s2, NULL);
1877 ERR_NOMEM(t == NULL, rtn->info);
1879 } else if (t->re == NULL) {
1880 if (r == NULL || regexp_matches_empty(r))
1883 t->re = regexp_maybe(rtn->info, r);
1885 ERR_NOMEM(t->re == NULL, rtn->info);
1887 } else if (r == NULL) {
1888 if (!regexp_matches_empty(t->re)) {
1889 r = regexp_maybe(rtn->info, t->re);
1890 unref(t->re, regexp);
1892 ERR_NOMEM(r == NULL, rtn->info);
1895 struct regexp *u = regexp_union(rtn->info, r, t->re);
1897 unref(t->re, regexp);
1899 ERR_NOMEM(u == NULL, rtn->info);
1904 rtn->exn = rtn->info->error->exn;
1908 /* Reduce the automaton with start state rprod->start and only accepting
1909 * state rprod->end so that we have a single transition rprod->start =>
1910 * rprod->end labelled with the overall approximating regexp for the
1913 * This is the same algorithm as fa_as_regexp in fa.c
1915 static struct regexp *rtn_reduce(struct rtn *rtn, struct lens *rec) {
1916 struct prod *prod = prod_for_lens(rtn, rec);
1919 ERR_THROW(prod == NULL, rtn->info, AUG_EINTERNAL,
1920 "No production for recursive lens");
1922 /* Eliminate epsilon transitions and turn transitions between the same
1923 * two states into a regexp union */
1924 list_for_each(s, rtn->states) {
1925 qsort(s->trans, s->ntrans, sizeof(*s->trans), trans_to_cmp);
1926 for (int i=0; i < s->ntrans; i++) {
1928 for (;j < s->ntrans && s->trans[i].to == s->trans[j].to;
1931 struct regexp *u, **v;
1932 r = ALLOC_N(v, j - i);
1933 ERR_NOMEM(r < 0, rtn->info);
1934 for (int k=i; k < j; k++)
1935 v[k-i] = s->trans[k].re;
1936 u = regexp_union_n(rtn->info, j - i, v);
1938 // FIXME: The calling convention for regexp_union_n
1939 // is bad, since we can't distinguish between alloc
1940 // failure and unioning all NULL's
1941 for (int k=0; k < j-i; k++)
1944 ERR_NOMEM(true, rtn->info);
1948 for (int k=i; k < j; k++) {
1949 unref(s->trans[k].lens, lens);
1950 unref(s->trans[k].re, regexp);
1953 MEMMOVE(s->trans + (i+1),
1956 s->ntrans -= j - (i + 1);
1961 /* Introduce new start and end states with epsilon transitions to/from
1962 * the old start and end states */
1963 struct state *end = NULL;
1964 struct state *start = NULL;
1965 if (ALLOC(start) < 0 || ALLOC(end) < 0) {
1968 ERR_NOMEM(true, rtn->info);
1970 list_insert_before(start, prod->start, rtn->states);
1971 end->next = prod->end->next;
1972 prod->end->next = end;
1974 add_trans(rtn, start, prod->start, NULL);
1976 add_trans(rtn, prod->end, end, NULL);
1979 prod->start = start;
1982 /* Eliminate states S (except for INI and FIN) one by one:
1983 * Let LOOP the regexp for the transition S -> S if it exists, epsilon
1985 * For all S1, S2 different from S with S1 -> S -> S2
1986 * Let R1 the regexp of S1 -> S
1987 * R2 the regexp of S -> S2
1988 * R3 the regexp of S1 -> S2 (or the regexp matching nothing
1989 * if no such transition)
1990 * set the regexp on the transition S1 -> S2 to
1991 * R1 . (LOOP)* . R2 | R3 */
1992 // FIXME: This does not go over all states
1993 list_for_each(s, rtn->states) {
1994 if (s == prod->end || s == prod->start)
1996 struct regexp *loop = NULL;
1997 for (int i=0; i < s->ntrans; i++) {
1998 if (s == s->trans[i].to) {
1999 ensure(loop == NULL, rtn->info);
2000 loop = s->trans[i].re;
2003 list_for_each(s1, rtn->states) {
2006 for (int t1=0; t1 < s1->ntrans; t1++) {
2007 if (s == s1->trans[t1].to) {
2008 for (int t2=0; t2 < s->ntrans; t2++) {
2009 struct state *s2 = s->trans[t2].to;
2012 collapse_trans(rtn, s1, s2,
2013 s1->trans[t1].re, loop,
2022 /* Find the overall regexp */
2023 struct regexp *result = NULL;
2024 for (int i=0; i < prod->start->ntrans; i++) {
2025 if (prod->start->trans[i].to == prod->end) {
2026 ensure(result == NULL, rtn->info);
2027 result = ref(prod->start->trans[i].re);
2035 static void propagate_type(struct lens *l, enum lens_type lt) {
2036 struct regexp **types = NULL;
2039 if (! l->recursive || ltype(l, lt) != NULL)
2044 r = ALLOC_N(types, l->nchildren);
2045 ERR_NOMEM(r < 0, l->info);
2046 for (int i=0; i < l->nchildren; i++) {
2047 propagate_type(l->children[i], lt);
2048 types[i] = ltype(l->children[i], lt);
2050 ltype(l, lt) = regexp_concat_n(l->info, l->nchildren, types);
2054 r = ALLOC_N(types, l->nchildren);
2055 ERR_NOMEM(r < 0, l->info);
2056 for (int i=0; i < l->nchildren; i++) {
2057 propagate_type(l->children[i], lt);
2058 types[i] = ltype(l->children[i], lt);
2060 ltype(l, lt) = regexp_union_n(l->info, l->nchildren, types);
2064 propagate_type(l->child, lt);
2066 l->atype = subtree_atype(l->info, l->child->ktype, l->child->vtype);
2068 l->ctype = ref(l->child->ctype);
2071 propagate_type(l->child, lt);
2072 ltype(l, lt) = regexp_iter(l->info, ltype(l->child, lt), 0, -1);
2075 propagate_type(l->child, lt);
2076 ltype(l, lt) = regexp_maybe(l->info, ltype(l->child, lt));
2082 propagate_type(l->child, lt);
2083 ltype(l, lt) = ref(ltype(l->child, lt));
2094 static struct value *typecheck(struct lens *l, int check);
2096 typedef struct value *typecheck_n_make(struct info *,
2097 struct lens *, struct lens *, int);
2099 static struct info *merge_info(struct info *i1, struct info *i2) {
2102 ERR_NOMEM(info == NULL, i1);
2104 info->filename = ref(i1->filename);
2105 info->first_line = i1->first_line;
2106 info->first_column = i1->first_column;
2107 info->last_line = i2->last_line;
2108 info->last_column = i2->last_column;
2109 info->error = i1->error;
2117 static struct value *typecheck_n(struct lens *l,
2118 typecheck_n_make *make, int check) {
2119 struct value *exn = NULL;
2120 struct lens *acc = NULL;
2122 ensure(l->tag == L_CONCAT || l->tag == L_UNION, l->info);
2123 for (int i=0; i < l->nchildren; i++) {
2124 exn = typecheck(l->children[i], check);
2128 acc = ref(l->children[0]);
2129 for (int i=1; i < l->nchildren; i++) {
2130 struct info *info = merge_info(acc->info, l->children[i]->info);
2131 ERR_NOMEM(info == NULL, acc->info);
2132 exn = (*make)(info, acc, ref(l->children[i]), check);
2135 ensure(exn->tag == V_LENS, l->info);
2136 acc = ref(exn->lens);
2139 l->value = acc->value;
2146 static struct value *typecheck(struct lens *l, int check) {
2147 struct value *exn = NULL;
2149 /* Nonrecursive lenses are typechecked at build time */
2155 exn = typecheck_n(l, lns_make_concat, check);
2158 exn = typecheck_n(l, lns_make_union, check);
2162 exn = typecheck(l->child, check);
2166 exn = typecheck_iter(l->info, l->child);
2167 if (exn == NULL && l->value)
2168 exn = make_exn_value(l->info, "Multiple stores in iteration");
2169 if (exn == NULL && l->key)
2170 exn = make_exn_value(l->info, "Multiple keys/labels in iteration");
2174 exn = typecheck_maybe(l->info, l->child);
2175 l->key = l->child->key;
2176 l->value = l->child->value;
2189 static struct value *rtn_approx(struct lens *rec, enum lens_type lt) {
2190 struct rtn *rtn = NULL;
2191 struct value *result = NULL;
2193 rtn = rtn_build(rec, lt);
2195 ltype(rec, lt) = rtn_reduce(rtn, rec);
2197 if (debugging("cf.approx"))
2198 rtn_dot(rtn, "50-reduce");
2200 propagate_type(rec->body, lt);
2201 ERR_BAIL(rec->info);
2206 if (debugging("cf.approx")) {
2207 printf("approx %s => ", lens_type_names[lt]);
2208 print_regexp(stdout, ltype(rec, lt));
2214 if (rtn->exn == NULL)
2215 result = rec->info->error->exn;
2217 result = ref(rtn->exn);
2221 static struct value *
2222 exn_multiple_epsilons(struct lens *lens,
2223 struct lens *l1, struct lens *l2) {
2225 struct value *exn = NULL;
2227 exn = make_exn_value(ref(lens->info),
2228 "more than one nullable branch in a union");
2229 fi = format_info(l1->info);
2230 exn_printf_line(exn, "First nullable lens: %s", fi);
2233 fi = format_info(l2->info);
2234 exn_printf_line(exn, "Second nullable lens: %s", fi);
2240 /* Update lens->ctype_nullable and return 1 if there was a change,
2241 * 0 if there was none */
2242 static int ctype_nullable(struct lens *lens, struct value **exn) {
2245 struct lens *null_lens = NULL;
2247 if (! lens->recursive)
2253 for (int i=0; i < lens->nchildren; i++) {
2254 if (ctype_nullable(lens->children[i], exn))
2256 if (! lens->children[i]->ctype_nullable)
2261 for (int i=0; i < lens->nchildren; i++) {
2262 if (ctype_nullable(lens->children[i], exn))
2264 if (lens->children[i]->ctype_nullable) {
2266 *exn = exn_multiple_epsilons(lens, null_lens,
2271 null_lens = lens->children[i];
2277 ret = ctype_nullable(lens->child, exn);
2278 nullable = lens->child->ctype_nullable;
2285 nullable = lens->body->ctype_nullable;
2293 if (nullable != lens->ctype_nullable) {
2295 lens->ctype_nullable = nullable;
2300 struct value *lns_check_rec(struct info *info,
2301 struct lens *body, struct lens *rec,
2303 /* The types in the order of approximation */
2304 static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
2305 struct value *result = NULL;
2307 ensure(rec->tag == L_REC, info);
2308 ensure(rec->rec_internal, info);
2310 /* The user might have written down a regular lens with 'let rec' */
2311 if (! body->recursive) {
2312 result = make_lens_value(ref(body));
2313 ERR_NOMEM(result == NULL, info);
2317 /* To help memory management, we avoid the cycle inherent ina recursive
2318 * lens by using two instances of an L_REC lens. One is marked with
2319 * rec_internal, and used inside the body of the lens. The other is the
2320 * "toplevel" which receives external references.
2322 * The internal instance of the recursive lens is REC, the external one
2323 * is TOP, constructed below
2325 rec->body = body; /* REC does not own BODY */
2327 for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
2328 result = rtn_approx(rec, types[i]);
2332 if (rec->atype == NULL) {
2333 result = make_exn_value(ref(rec->info),
2334 "recursive lens generates the empty language for its %s",
2335 rec->ctype == NULL ? "ctype" : "atype");
2339 rec->key = rec->body->key;
2340 rec->value = rec->body->value;
2341 rec->consumes_value = rec->body->consumes_value;
2343 while(ctype_nullable(rec->body, &result));
2346 rec->ctype_nullable = rec->body->ctype_nullable;
2348 result = typecheck(rec->body, check);
2352 result = lns_make_rec(ref(rec->info));
2353 struct lens *top = result->lens;
2354 for (int t=0; t < ntypes; t++)
2355 ltype(top, t) = ref(ltype(rec, t));
2356 top->value = rec->value;
2357 top->key = rec->key;
2358 top->consumes_value = rec->consumes_value;
2359 top->ctype_nullable = rec->ctype_nullable;
2360 top->body = ref(body);
2362 top->rec_internal = 0;
2365 top->jmt = jmt_build(top);
2370 if (result != NULL && result->tag != V_EXN)
2371 unref(result, value);
2373 result = info->error->exn;
2378 void dump_lens_tree(struct lens *lens){
2379 static int count = 0;
2382 fp = debug_fopen("lens_%02d_%s.dot", count++, ltag(lens));
2386 fprintf(fp, "digraph \"%s\" {\n", "lens");
2387 dump_lens(fp, lens);
2393 void dump_lens(FILE *out, struct lens *lens){
2397 fprintf(out, "\"%p\" [ shape = box, label = \"%s\\n", lens, ltag(lens));
2399 for (int t=0; t < ntypes; t++) {
2400 re = ltype(lens, t);
2403 fprintf(out, "%s=",lens_type_names[t]);
2404 print_regexp(out, re);
2405 fprintf(out, "\\n");
2408 fprintf(out, "recursive=%x\\n", lens->recursive);
2409 fprintf(out, "rec_internal=%x\\n", lens->rec_internal);
2410 fprintf(out, "consumes_value=%x\\n", lens->consumes_value);
2411 fprintf(out, "ctype_nullable=%x\\n", lens->ctype_nullable);
2412 fprintf(out, "\"];\n");
2429 for(i = 0; i<lens->nchildren;i++){
2430 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2431 dump_lens(out, lens->children[i]);
2435 for(i = 0; i<lens->nchildren;i++){
2436 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2437 dump_lens(out, lens->children[i]);
2441 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2442 dump_lens(out, lens->child);
2445 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2446 dump_lens(out, lens->child);
2450 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2451 dump_lens(out, lens->child);
2455 if (lens->rec_internal == 0){
2456 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2457 dump_lens(out, lens->body);
2461 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2462 dump_lens(out, lens->child);
2465 fprintf(out, "ERROR\n");
2473 * indent-tabs-mode: nil