4 * Copyright (C) 2007-2011 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
31 /* This enum must be kept in sync with type_offs and ntypes */
33 CTYPE, ATYPE, KTYPE, VTYPE
36 static const int const type_offs[] = {
37 offsetof(struct lens, ctype),
38 offsetof(struct lens, atype),
39 offsetof(struct lens, ktype),
40 offsetof(struct lens, vtype)
42 static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
44 static const char *lens_type_names[] =
45 { "ctype", "atype", "ktype", "vtype" };
47 #define ltype(lns, t) *((struct regexp **) ((char *) lns + type_offs[t]))
49 static struct value * typecheck_union(struct info *,
50 struct lens *l1, struct lens *l2);
51 static struct value *typecheck_concat(struct info *,
52 struct lens *l1, struct lens *l2);
53 static struct value *typecheck_square(struct info *,
54 struct lens *l1, struct lens *l2);
55 static struct value *typecheck_iter(struct info *info, struct lens *l);
56 static struct value *typecheck_maybe(struct info *info, struct lens *l);
58 /* Lens names for pretty printing */
59 /* keep order in sync with enum type */
60 static const char *const tags[] = {
61 "del", "store", "value", "key", "label", "seq", "counter",
63 "subtree", "star", "maybe", "rec", "square"
66 #define ltag(lens) (tags[lens->tag - L_DEL])
68 static const struct string digits_string = {
69 .ref = REF_MAX, .str = (char *) "[0123456789]+"
71 static const struct string *const digits_pat = &digits_string;
73 char *format_lens(struct lens *l) {
74 char *inf = format_info(l->info);
77 xasprintf(&result, "%s[%s]%s", tags[l->tag - L_DEL], inf,
78 l->recursive ? "R" : "r");
83 #define BUG_LENS_TAG(lns) bug_lens_tag(lns, __FILE__, __LINE__)
85 static void bug_lens_tag(struct lens *lens, const char *file, int lineno) {
86 char *s = format_lens(lens);
88 if (lens != NULL && lens->info != NULL && lens->info->error != NULL) {
89 bug_on(lens->info->error, file, lineno, "Unexpected lens tag %s", s);
91 /* We are really screwed */
98 /* Construct a finite automaton from REGEXP and return it in *FA.
100 * Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors,
101 * return an exception.
103 static struct value *str_to_fa(struct info *info, const char *pattern,
104 struct fa **fa, int nocase) {
106 struct value *exn = NULL;
108 char *re_str = NULL, *re_err = NULL;
111 error = fa_compile(pattern, strlen(pattern), fa);
112 if (error == REG_NOERROR) {
114 error = fa_nocase(*fa);
115 ERR_NOMEM(error < 0, info);
120 re_str = escape(pattern, -1, RX_ESCAPES);
121 ERR_NOMEM(re_str == NULL, info);
123 exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
125 re_err_len = regerror(error, NULL, NULL, 0);
126 error = ALLOC_N(re_err, re_err_len);
127 ERR_NOMEM(error < 0, info);
129 regerror(error, NULL, re_err, re_err_len);
130 exn_printf_line(exn, "%s", re_err);
139 exn = info->error->exn;
143 static struct value *regexp_to_fa(struct regexp *regexp, struct fa **fa) {
144 return str_to_fa(regexp->info, regexp->pattern->str, fa, regexp->nocase);
147 static struct lens *make_lens(enum lens_tag tag, struct info *info) {
156 static struct lens *make_lens_unop(enum lens_tag tag, struct info *info,
157 struct lens *child) {
158 struct lens *lens = make_lens(tag, info);
160 lens->value = child->value;
161 lens->key = child->key;
165 typedef struct regexp *regexp_combinator(struct info *, int, struct regexp **);
167 static struct lens *make_lens_binop(enum lens_tag tag, struct info *info,
168 struct lens *l1, struct lens *l2,
169 regexp_combinator *combinator) {
170 struct lens *lens = make_lens(tag, info);
171 int n1 = (l1->tag == tag) ? l1->nchildren : 1;
172 struct regexp **types = NULL;
177 lens->nchildren = n1;
178 lens->nchildren += (l2->tag == tag) ? l2->nchildren : 1;
180 lens->recursive = l1->recursive || l2->recursive;
181 lens->rec_internal = l1->rec_internal || l2->rec_internal;
183 if (ALLOC_N(lens->children, lens->nchildren) < 0) {
188 if (l1->tag == tag) {
189 for (int i=0; i < l1->nchildren; i++)
190 lens->children[i] = ref(l1->children[i]);
193 lens->children[0] = l1;
196 if (l2->tag == tag) {
197 for (int i=0; i < l2->nchildren; i++)
198 lens->children[n1 + i] = ref(l2->children[i]);
201 lens->children[n1] = l2;
204 for (int i=0; i < lens->nchildren; i++) {
205 lens->value = lens->value || lens->children[i]->value;
206 lens->key = lens->key || lens->children[i]->key;
209 if (ALLOC_N(types, lens->nchildren) < 0)
212 if (! lens->rec_internal) {
213 /* Inside a recursive lens, we assign types with lns_check_rec
214 * once we know the entire lens */
215 for (int t=0; t < ntypes; t++) {
216 if (lens->recursive && t == CTYPE)
218 for (int i=0; i < lens->nchildren; i++)
219 types[i] = ltype(lens->children[i], t);
220 ltype(lens, t) = (*combinator)(info, lens->nchildren, types);
225 for (int i=0; i < lens->nchildren; i++)
226 ensure(tag != lens->children[i]->tag, lens->info);
235 static struct value *make_lens_value(struct lens *lens) {
237 v = make_value(V_LENS, ref(lens->info));
242 struct value *lns_make_union(struct info *info,
243 struct lens *l1, struct lens *l2, int check) {
244 struct lens *lens = NULL;
245 int consumes_value = l1->consumes_value && l2->consumes_value;
246 int recursive = l1->recursive || l2->recursive;
247 int ctype_nullable = l1->ctype_nullable || l2->ctype_nullable;
250 struct value *exn = typecheck_union(info, l1, l2);
255 lens = make_lens_binop(L_UNION, info, l1, l2, regexp_union_n);
256 lens->consumes_value = consumes_value;
258 lens->ctype_nullable = ctype_nullable;
259 return make_lens_value(lens);
262 struct value *lns_make_concat(struct info *info,
263 struct lens *l1, struct lens *l2, int check) {
264 struct lens *lens = NULL;
265 int consumes_value = l1->consumes_value || l2->consumes_value;
266 int recursive = l1->recursive || l2->recursive;
267 int ctype_nullable = l1->ctype_nullable && l2->ctype_nullable;
270 struct value *exn = typecheck_concat(info, l1, l2);
274 if (l1->value && l2->value) {
275 return make_exn_value(info, "Multiple stores in concat");
277 if (l1->key && l2->key) {
278 return make_exn_value(info, "Multiple keys/labels in concat");
281 lens = make_lens_binop(L_CONCAT, info, l1, l2, regexp_concat_n);
282 lens->consumes_value = consumes_value;
284 lens->ctype_nullable = ctype_nullable;
285 return make_lens_value(lens);
288 static struct regexp *subtree_atype(struct info *info,
289 struct regexp *ktype,
290 struct regexp *vtype) {
291 const char *kpat = (ktype == NULL) ? ENC_NULL : ktype->pattern->str;
292 const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
294 struct regexp *result = NULL;
295 char *ks = NULL, *vs = NULL;
298 if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
299 ks = regexp_expand_nocase(ktype);
300 vs = regexp_expand_nocase(vtype);
301 ERR_NOMEM(ks == NULL || vs == NULL, info);
302 if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
303 ERR_NOMEM(true, info);
306 if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
307 ERR_NOMEM(pat == NULL, info);
311 nocase = ktype->nocase;
312 else if (vtype != NULL)
313 nocase = vtype->nocase;
315 result = make_regexp(info, pat, nocase);
323 * A subtree lens l1 = [ l ]
325 * Types are assigned as follows:
327 * l1->ctype = l->ctype
328 * l1->atype = encode(l->ktype, l->vtype)
332 struct value *lns_make_subtree(struct info *info, struct lens *l) {
335 lens = make_lens_unop(L_SUBTREE, info, l);
336 lens->ctype = ref(l->ctype);
338 lens->atype = subtree_atype(info, l->ktype, l->vtype);
339 lens->value = lens->key = 0;
340 lens->recursive = l->recursive;
341 lens->rec_internal = l->rec_internal;
343 lens->ctype_nullable = l->ctype_nullable;
344 return make_lens_value(lens);
347 struct value *lns_make_star(struct info *info, struct lens *l, int check) {
351 struct value *exn = typecheck_iter(info, l);
356 return make_exn_value(info, "Multiple stores in iteration");
359 return make_exn_value(info, "Multiple keys/labels in iteration");
362 lens = make_lens_unop(L_STAR, info, l);
363 for (int t = 0; t < ntypes; t++) {
364 ltype(lens, t) = regexp_iter(info, ltype(l, t), 0, -1);
366 lens->recursive = l->recursive;
367 lens->rec_internal = l->rec_internal;
368 lens->ctype_nullable = 1;
369 return make_lens_value(lens);
372 struct value *lns_make_plus(struct info *info, struct lens *l, int check) {
373 struct value *star, *conc;
375 star = lns_make_star(info, l, check);
379 conc = lns_make_concat(ref(info), ref(l), ref(star->lens), check);
384 struct value *lns_make_maybe(struct info *info, struct lens *l, int check) {
388 struct value *exn = typecheck_maybe(info, l);
392 lens = make_lens_unop(L_MAYBE, info, l);
393 for (int t=0; t < ntypes; t++)
394 ltype(lens, t) = regexp_maybe(info, ltype(l, t));
395 lens->value = l->value;
397 lens->recursive = l->recursive;
398 lens->rec_internal = l->rec_internal;
399 lens->ctype_nullable = 1;
400 return make_lens_value(lens);
403 /* The ctype of SQR is a regular approximation of the true ctype of SQR
404 * at this point. In some situations, for example in processing quoted
405 * strings this leads to false typecheck errors; to lower the chances
406 * of these, we try to construct the precise ctype of SQR if the
407 * language of L1 is finite (and has a small number of words)
409 static void square_precise_type(struct info *info,
412 struct regexp *body) {
416 struct fa *fa = NULL;
417 struct value *exn = NULL;
418 struct regexp **u = NULL, *c[3], *w = NULL;
420 exn = str_to_fa(info, left->pattern->str, &fa, left->nocase);
424 nwords = fa_enumerate(fa, 10, &words); /* The limit of 10 is arbitrary */
428 r = ALLOC_N(u, nwords);
429 ERR_NOMEM(r < 0, info);
432 for (int i=0; i < nwords; i++) {
433 w = make_regexp_literal(left->info, words[i]);
434 ERR_NOMEM(w == NULL, info);
435 w->nocase = left->nocase;
438 u[i] = regexp_concat_n(info, 3, c);
441 ERR_NOMEM(u[i] == NULL, info);
443 w = regexp_union_n(info, nwords, u);
452 for (int i=0; i < nwords; i++) {
463 /* Build a square lens as
464 * left . body . right
465 * where left and right accepts the same language and
466 * captured strings must match. The inability to express this with other
467 * lenses makes the square primitive necessary.
469 struct value * lns_make_square(struct info *info, struct lens *l1,
470 struct lens *l2, struct lens *l3, int check) {
471 struct value *cnt1 = NULL, *cnt2 = NULL, *res = NULL;
472 struct lens *sqr = NULL;
474 /* supported types: L_KEY . body . L_DEL or L_DEL . body . L_DEL */
475 if (l3->tag != L_DEL || (l1->tag != L_DEL && l1->tag != L_KEY))
476 return make_exn_value(info, "Supported types: (key lns del) or (del lns del)");
478 res = typecheck_square(info, l1, l3);
482 res = lns_make_concat(ref(info), ref(l1), ref(l2), check);
486 res = lns_make_concat(ref(info), ref(cnt1->lens), ref(l3), check);
491 sqr = make_lens_unop(L_SQUARE, ref(info), ref(cnt2->lens));
492 ERR_NOMEM(sqr == NULL, info);
494 for (int t=0; t < ntypes; t++)
495 ltype(sqr, t) = ref(ltype(cnt2->lens, t));
497 square_precise_type(info, &(sqr->ctype), l1->ctype, l2->ctype);
499 sqr->recursive = cnt2->lens->recursive;
500 sqr->rec_internal = cnt2->lens->rec_internal;
501 sqr->consumes_value = cnt2->lens->consumes_value;
503 res = make_lens_value(sqr);
504 ERR_NOMEM(res == NULL, info);
522 static struct regexp *make_regexp_from_string(struct info *info,
523 struct string *string) {
528 r->pattern = ref(string);
534 static struct regexp *restrict_regexp(struct regexp *r) {
536 struct regexp *result = NULL;
540 ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
542 RESERVED_FROM, RESERVED_TO);
543 ERR_NOMEM(ret == REG_ESPACE || ret < 0, r->info);
544 BUG_ON(ret != 0, r->info, NULL);
545 ensure(nre_len == strlen(nre), r->info);
547 ret = regexp_c_locale(&nre, &nre_len);
548 ERR_NOMEM(ret < 0, r->info);
550 result = make_regexp(r->info, nre, r->nocase);
552 BUG_ON(regexp_compile(result) != 0, r->info,
553 "Could not compile restricted regexp");
558 unref(result, regexp);
562 struct value *lns_make_prim(enum lens_tag tag, struct info *info,
563 struct regexp *regexp, struct string *string) {
564 struct lens *lens = NULL;
565 struct value *exn = NULL;
566 struct fa *fa_slash = NULL;
567 struct fa *fa_key = NULL;
568 struct fa *fa_isect = NULL;
572 exn = str_to_fa(info, "(.|\n)*/(.|\n)*", &fa_slash, regexp->nocase);
576 exn = regexp_to_fa(regexp, &fa_key);
580 fa_isect = fa_intersect(fa_slash, fa_key);
581 if (! fa_is_basic(fa_isect, FA_EMPTY)) {
582 exn = make_exn_value(info,
583 "The key regexp /%s/ matches a '/'",
584 regexp->pattern->str);
590 fa_isect = fa_key = fa_slash = NULL;
591 } else if (tag == L_LABEL) {
592 if (strchr(string->str, SEP) != NULL) {
593 exn = make_exn_value(info,
594 "The label string \"%s\" contains a '/'",
598 } else if (tag == L_DEL && string != NULL) {
600 const char *dflt = string->str;
601 cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
602 if (cnt != strlen(dflt)) {
603 char *s = escape(dflt, -1, RX_ESCAPES);
604 char *r = regexp_escape(regexp);
605 exn = make_exn_value(info,
606 "del: the default value '%s' does not match /%s/",
614 /* Build the actual lens */
615 lens = make_lens(tag, info);
616 lens->regexp = regexp;
617 lens->string = string;
618 lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ);
619 lens->value = (tag == L_STORE || tag == L_VALUE);
620 lens->consumes_value = (tag == L_STORE || tag == L_VALUE);
621 lens->atype = regexp_make_empty(info);
623 if (tag == L_DEL || tag == L_STORE || tag == L_KEY) {
624 lens->ctype = ref(regexp);
625 lens->ctype_nullable = regexp_matches_empty(lens->ctype);
626 } else if (tag == L_LABEL || tag == L_VALUE
627 || tag == L_SEQ || tag == L_COUNTER) {
628 lens->ctype = regexp_make_empty(info);
629 lens->ctype_nullable = 1;
639 make_regexp_from_string(info, (struct string *) digits_pat);
640 if (lens->ktype == NULL)
642 } else if (tag == L_KEY) {
643 lens->ktype = restrict_regexp(lens->regexp);
644 } else if (tag == L_LABEL) {
645 lens->ktype = make_regexp_literal(info, lens->string->str);
646 if (lens->ktype == NULL)
651 if (tag == L_STORE) {
652 lens->vtype = restrict_regexp(lens->regexp);
653 } else if (tag == L_VALUE) {
654 lens->vtype = make_regexp_literal(info, lens->string->str);
657 return make_lens_value(lens);
666 * Typechecking of lenses
668 static struct value *disjoint_check(struct info *info, bool is_get,
669 struct regexp *r1, struct regexp *r2) {
670 struct fa *fa1 = NULL;
671 struct fa *fa2 = NULL;
672 struct fa *fa = NULL;
673 struct value *exn = NULL;
674 const char *const msg = is_get ? "union.get" : "tree union.put";
676 if (r1 == NULL || r2 == NULL)
679 exn = regexp_to_fa(r1, &fa1);
683 exn = regexp_to_fa(r2, &fa2);
687 fa = fa_intersect(fa1, fa2);
688 if (! fa_is_basic(fa, FA_EMPTY)) {
691 fa_example(fa, &xmpl, &xmpl_len);
693 char *fmt = enc_format(xmpl, xmpl_len);
699 exn = make_exn_value(ref(info),
700 "overlapping lenses in %s", msg);
703 exn_printf_line(exn, "Example matched by both: '%s'", xmpl);
705 exn_printf_line(exn, "Example matched by both: %s", xmpl);
717 static struct value *typecheck_union(struct info *info,
718 struct lens *l1, struct lens *l2) {
719 struct value *exn = NULL;
721 exn = disjoint_check(info, true, l1->ctype, l2->ctype);
723 exn = disjoint_check(info, false, l1->atype, l2->atype);
726 char *fi = format_info(l1->info);
727 exn_printf_line(exn, "First lens: %s", fi);
730 fi = format_info(l2->info);
731 exn_printf_line(exn, "Second lens: %s", fi);
737 static struct value *
738 ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
739 enum lens_type typ, struct lens *l1, struct lens *l2,
740 const char *msg, bool iterated) {
743 struct value *exn = NULL;
746 r = fa_ambig_example(fa1, fa2, &upv, &upv_len, &pv, &v);
748 exn = make_exn_value(ref(info), "not enough memory");
752 ERR_REPORT(info, AUG_ENOMEM, NULL);
753 return info->error->exn;
758 char *e_u, *e_up, *e_upv, *e_pv, *e_v;
762 e_u = enc_format(upv, pv - upv);
763 e_up = enc_format(upv, v - upv);
764 e_upv = enc_format(upv, upv_len);
765 e_pv = enc_format(pv, strlen(pv));
766 e_v = enc_format(v, strlen(v));
767 lns_format_atype(l1, &s1);
768 lns_format_atype(l2, &s2);
770 e_u = escape(upv, pv - upv, RX_ESCAPES);
771 e_up = escape(upv, v - upv, RX_ESCAPES);
772 e_upv = escape(upv, -1, RX_ESCAPES);
773 e_pv = escape(pv, -1, RX_ESCAPES);
774 e_v = escape(v, -1, RX_ESCAPES);
775 s1 = regexp_escape(ltype(l1, typ));
776 s2 = regexp_escape(ltype(l2, typ));
778 exn = make_exn_value(ref(info), "%s", msg);
780 exn_printf_line(exn, " Iterated regexp: /%s/", s1);
782 exn_printf_line(exn, " First regexp: /%s/", s1);
783 exn_printf_line(exn, " Second regexp: /%s/", s2);
785 exn_printf_line(exn, " '%s' can be split into", e_upv);
786 exn_printf_line(exn, " '%s|=|%s'\n", e_u, e_pv);
787 exn_printf_line(exn, " and");
788 exn_printf_line(exn, " '%s|=|%s'\n", e_up, e_v);
801 static struct value *
802 ambig_concat_check(struct info *info, const char *msg,
803 enum lens_type typ, struct lens *l1, struct lens *l2) {
804 struct fa *fa1 = NULL;
805 struct fa *fa2 = NULL;
806 struct value *result = NULL;
807 struct regexp *r1 = ltype(l1, typ);
808 struct regexp *r2 = ltype(l2, typ);
810 if (r1 == NULL || r2 == NULL)
813 result = regexp_to_fa(r1, &fa1);
817 result = regexp_to_fa(r2, &fa2);
821 result = ambig_check(info, fa1, fa2, typ, l1, l2, msg, false);
828 static struct value *typecheck_concat(struct info *info,
829 struct lens *l1, struct lens *l2) {
830 struct value *result = NULL;
832 result = ambig_concat_check(info, "ambiguous concatenation",
834 if (result == NULL) {
835 result = ambig_concat_check(info, "ambiguous tree concatenation",
838 if (result != NULL) {
839 char *fi = format_info(l1->info);
840 exn_printf_line(result, "First lens: %s", fi);
842 fi = format_info(l2->info);
843 exn_printf_line(result, "Second lens: %s", fi);
849 static struct value *make_exn_square(struct info *info, struct lens *l1,
850 struct lens *l2, const char *msg) {
853 struct value *exn = make_exn_value(ref(info), "%s",
854 "Inconsistency in lens square");
855 exn_printf_line(exn, "%s", msg);
856 fi = format_info(l1->info);
857 exn_printf_line(exn, "Left lens: %s", fi);
859 fi = format_info(l2->info);
860 exn_printf_line(exn, "Right lens: %s", fi);
865 static struct value *typecheck_square(struct info *info, struct lens *l1,
868 struct value *exn = NULL;
869 struct fa *fa1 = NULL, *fa2 = NULL;
870 struct regexp *r1 = ltype(l1, CTYPE);
871 struct regexp *r2 = ltype(l2, CTYPE);
873 if (r1 == NULL || r2 == NULL)
876 exn = regexp_to_fa(r1, &fa1);
880 exn = regexp_to_fa(r2, &fa2);
884 r = fa_equals(fa1, fa2);
887 exn = make_exn_value(ref(info), "not enough memory");
891 ERR_REPORT(info, AUG_ENOMEM, NULL);
892 return info->error->exn;;
897 exn = make_exn_square(info, l1, l2,
898 "Left and right lenses must accept the same language");
902 /* check del create consistency */
903 if (l1->tag == L_DEL && l2->tag == L_DEL) {
904 if (!STREQ(l1->string->str, l2->string->str)) {
905 exn = make_exn_square(info, l1, l2,
906 "Left and right lenses must have the same default value");
917 static struct value *
918 ambig_iter_check(struct info *info, const char *msg,
919 enum lens_type typ, struct lens *l) {
920 struct fa *fas = NULL, *fa = NULL;
921 struct value *result = NULL;
922 struct regexp *r = ltype(l, typ);
927 result = regexp_to_fa(r, &fa);
931 fas = fa_iter(fa, 0, -1);
933 result = ambig_check(info, fa, fas, typ, l, l, msg, true);
941 static struct value *typecheck_iter(struct info *info, struct lens *l) {
942 struct value *result = NULL;
944 result = ambig_iter_check(info, "ambiguous iteration", CTYPE, l);
945 if (result == NULL) {
946 result = ambig_iter_check(info, "ambiguous tree iteration", ATYPE, l);
948 if (result != NULL) {
949 char *fi = format_info(l->info);
950 exn_printf_line(result, "Iterated lens: %s", fi);
956 static struct value *typecheck_maybe(struct info *info, struct lens *l) {
957 /* Check (r)? as (<e>|r) where <e> is the empty language */
958 struct value *exn = NULL;
960 if (l->ctype != NULL && regexp_matches_empty(l->ctype)) {
961 exn = make_exn_value(ref(info),
962 "illegal optional expression: /%s/ matches the empty word",
963 l->ctype->pattern->str);
966 /* Typecheck the put direction; the check passes if
967 (1) the atype does not match the empty string, because we can tell
968 from looking at tree nodes whether L should be applied or not
969 (2) L handles a value; with that, we know whether to apply L or not
970 depending on whether the current node has a non NULL value or not
972 if (exn == NULL && ! l->consumes_value) {
973 if (l->atype != NULL && regexp_matches_empty(l->atype)) {
974 exn = make_exn_value(ref(info),
975 "optional expression matches the empty tree but does not consume a value");
981 void free_lens(struct lens *lens) {
984 ensure(lens->ref == 0, lens->info);
986 if (debugging("lenses"))
987 dump_lens_tree(lens);
990 unref(lens->regexp, regexp);
991 unref(lens->string, string);
995 unref(lens->regexp, regexp);
1001 unref(lens->string, string);
1007 unref(lens->child, lens);
1011 for (int i=0; i < lens->nchildren; i++)
1012 unref(lens->children[i], lens);
1013 free(lens->children);
1016 if (!lens->rec_internal) {
1017 unref(lens->body, lens);
1025 for (int t=0; t < ntypes; t++)
1026 unref(ltype(lens, t), regexp);
1028 unref(lens->info, info);
1029 jmt_free(lens->jmt);
1035 void lens_release(struct lens *lens) {
1039 for (int t=0; t < ntypes; t++)
1040 regexp_release(ltype(lens, t));
1042 if (lens->tag == L_KEY || lens->tag == L_STORE)
1043 regexp_release(lens->regexp);
1045 if (lens->tag == L_SUBTREE || lens->tag == L_STAR
1046 || lens->tag == L_MAYBE || lens->tag == L_SQUARE) {
1047 lens_release(lens->child);
1050 if (lens->tag == L_UNION || lens->tag == L_CONCAT) {
1051 for (int i=0; i < lens->nchildren; i++) {
1052 lens_release(lens->children[i]);
1056 jmt_free(lens->jmt);
1061 * Encoding of tree levels
1063 char *enc_format(const char *e, size_t len) {
1065 char *result = NULL, *r;
1068 while (*k && k - e < len) {
1069 char *eq, *slash, *v;
1070 eq = strchr(k, ENC_EQ_CH);
1072 slash = strchr(eq, ENC_SLASH_CH);
1073 assert(slash != NULL);
1076 size += 6; /* Surrounding braces */
1078 size += 1 + (eq - k) + 1;
1080 size += 4 + (slash - v) + 1;
1083 if (ALLOC_N(result, size + 1) < 0)
1088 while (*k && k - e < len) {
1089 char *eq, *slash, *v;
1090 eq = strchr(k, ENC_EQ_CH);
1091 slash = strchr(eq, ENC_SLASH_CH);
1092 assert(eq != NULL && slash != NULL);
1095 r = stpcpy(r, " { ");
1097 r = stpcpy(r, "\"");
1098 r = stpncpy(r, k, eq - k);
1099 r = stpcpy(r, "\"");
1102 r = stpcpy (r, " = \"");
1103 r = stpncpy(r, v, slash - v);
1104 r = stpcpy(r, "\"");
1106 r = stpcpy(r, " }");
1112 static int lns_format_subtree_atype(struct lens *l, char **buf) {
1113 char *k = NULL, *v = NULL;
1114 const struct regexp *ktype = l->child->ktype;
1115 const struct regexp *vtype = l->child->vtype;
1118 if (ktype != NULL) {
1119 k = regexp_escape(ktype);
1123 if (vtype != NULL) {
1124 v = regexp_escape(vtype);
1128 r = xasprintf(buf, "{ = /%s/ }", k, v);
1130 r = xasprintf(buf, "{ /%s/ = /%s/ }", k, v);
1133 r = xasprintf(buf, "{ }", k);
1135 r = xasprintf(buf, "{ /%s/ }", k);
1147 static int lns_format_rep_atype(struct lens *l, char **buf, char quant) {
1151 r = lns_format_atype(l->child, &a);
1154 if (strlen(a) == 0) {
1161 if (l->child->tag == L_CONCAT || l->child->tag == L_UNION)
1162 r = xasprintf(buf, "(%s)%c", a, quant);
1164 r = xasprintf(buf, "%s%c", a, quant);
1175 static int lns_format_concat_atype(struct lens *l, char **buf) {
1176 char **c = NULL, *s = NULL, *p;
1178 size_t len = 0, nconc = 0;
1180 if (ALLOC_N(c, l->nchildren) < 0)
1183 for (int i=0; i < l->nchildren; i++) {
1184 r = lns_format_atype(l->children[i], c+i);
1187 len += strlen(c[i]) + 2;
1188 if (strlen(c[i]) > 0)
1190 if (l->children[i]->tag == L_UNION)
1194 if (ALLOC_N(s, len+1) < 0)
1197 for (int i=0; i < l->nchildren; i++) {
1198 bool needs_parens = nconc > 1 && l->children[i]->tag == L_UNION;
1199 if (strlen(c[i]) == 0)
1203 p = stpcpy(p, c[i]);
1213 for (int i=0; i < l->nchildren; i++)
1220 static int lns_format_union_atype(struct lens *l, char **buf) {
1221 char **c = NULL, *s = NULL, *p;
1225 if (ALLOC_N(c, l->nchildren) < 0)
1228 for (int i=0; i < l->nchildren; i++) {
1229 r = lns_format_atype(l->children[i], c+i);
1232 len += strlen(c[i]) + 2;
1234 len += l->nchildren - 1;
1236 if (ALLOC_N(s, len+1) < 0)
1240 for (int i=0; i < l->nchildren; i++) {
1242 p = stpcpy(p, " | ");
1243 if (strlen(c[i]) == 0)
1244 p = stpcpy(p, "()");
1246 p = stpcpy(p, c[i]);
1253 for (int i=0; i < l->nchildren; i++)
1260 static int lns_format_rec_atype(struct lens *l, char **buf) {
1263 if (l->rec_internal) {
1264 *buf = strdup("<<rec>>");
1265 return (*buf == NULL) ? -1 : 0;
1269 r = lns_format_atype(l->body, &c);
1272 r = xasprintf(buf, "<<rec:%s>>", c);
1274 return (r < 0) ? -1 : 0;
1277 int lns_format_atype(struct lens *l, char **buf) {
1289 return (*buf == NULL) ? -1 : 0;
1292 return lns_format_subtree_atype(l, buf);
1295 return lns_format_rep_atype(l, buf, '*');
1298 return lns_format_rep_atype(l, buf, '?');
1301 return lns_format_concat_atype(l, buf);
1304 return lns_format_union_atype(l, buf);
1307 return lns_format_rec_atype(l, buf);
1310 return lns_format_concat_atype(l->child, buf);
1322 struct value *lns_make_rec(struct info *info) {
1323 struct lens *l = make_lens(L_REC, info);
1325 l->rec_internal = 1;
1327 return make_lens_value(l);
1330 /* Transform a recursive lens into a recursive transition network
1332 * First, we transform the lens into context free grammar, considering any
1333 * nonrecursive lens as a terminal
1335 * cfg: lens -> nonterminal -> production list
1337 * cfg(primitive, N) -> N := regexp(primitive)
1338 * cfg(l1 . l2, N) -> N := N1 . N2 + cfg(l1, N1) + cfg(l2, N2)
1339 * cfg(l1 | l2, N) -> N := N1 | N2 + cfg(l1, N1) + cfg(l2, N2)
1340 * cfg(l*, N) -> N := N . N' | eps + cfg(l, N')
1341 * cfg([ l ], N) -> N := N' + cfg(l, N')
1343 * We use the lenses as nonterminals themselves; this also means that our
1344 * productions are normalized such that the RHS is either a terminal
1345 * (regexp) or entirely consists of nonterminals
1347 * In a few places, we need to know that a nonterminal corresponds to a
1348 * subtree combinator ([ l ]); this is the main reason that the rule (cfg[
1349 * l ], N) introduces a useless production N := N'.
1351 * Computing the types for a recursive lens r is (fairly) straightforward,
1352 * given the above grammar, which we convert to an automaton following
1353 * http://arxiv.org/abs/cs/9910022; the only complication arises from the
1354 * subtree combinator, since it can be used in recursive lenses to
1355 * construct trees of arbitrary depth, but we need to approximate the types
1356 * of r in a way that fits with our top-down tree automaton in put.c.
1358 * To handle subtree combinators, remember that the type rules for a lens
1364 * m.atype = enc(l.ktype, l.vtype)
1365 * ( enc is a function regexp -> regexp -> regexp)
1367 * We compute types for r by modifying its automaton according to
1368 * Nederhof's paper and reducing it to a regular expression of lenses. This
1369 * has to happen in the following steps:
1370 * r.ktype : approximate by using [ .. ].ktype = NULL
1371 * r.vtype : same as r.ktype
1372 * r.ctype : approximate by treating [ l ] as l
1373 * r.atype : approximate by using r.ktype and r.vtype from above
1374 * in lens expressions [ f(r) ]
1377 /* Transitions go to a state and are labeled with a lens. For epsilon
1378 * transitions, lens may be NULL. When lens is a simple (nonrecursive
1379 * lens), PROD will be NULL. When we modify the automaton to splice
1380 * nonterminals in, we remember the production for the nonterminal in PROD.
1389 struct state *next; /* Linked list for memory management */
1391 struct trans *trans;
1394 /* Productions for lens LENS. Start state START and end state END. If we
1395 start with START, END is the only accepting state. */
1398 struct state *start;
1402 /* A recursive transition network used to compute regular approximations
1408 struct state *states; /* Linked list through next of all states in all
1409 prods; the states for each production are on
1410 the part of the list from prod->start to
1413 enum lens_type lens_type;
1414 unsigned int check : 1;
1417 #define RTN_BAIL(rtn) if ((rtn)->exn != NULL || \
1418 (rtn)->info->error->code != AUG_NOERROR) \
1421 static void free_prod(struct prod *prod) {
1424 unref(prod->lens, lens);
1428 static void free_rtn(struct rtn *rtn) {
1431 for (int i=0; i < rtn->nprod; i++)
1432 free_prod(rtn->prod[i]);
1434 list_for_each(s, rtn->states) {
1435 for (int i=0; i < s->ntrans; i++) {
1436 unref(s->trans[i].lens, lens);
1437 unref(s->trans[i].re, regexp);
1441 list_free(rtn->states);
1442 unref(rtn->info, info);
1443 unref(rtn->exn, value);
1447 static struct state *add_state(struct prod *prod) {
1448 struct state *result = NULL;
1452 ERR_NOMEM(r < 0, prod->lens->info);
1454 list_cons(prod->start->next, result);
1459 static struct trans *add_trans(struct rtn *rtn, struct state *state,
1460 struct state *to, struct lens *l) {
1462 struct trans *result = NULL;
1464 for (int i=0; i < state->ntrans; i++)
1465 if (state->trans[i].to == to && state->trans[i].lens == l)
1466 return state->trans + i;
1468 r = REALLOC_N(state->trans, state->ntrans+1);
1469 ERR_NOMEM(r < 0, rtn->info);
1471 result = state->trans + state->ntrans;
1477 result->lens = ref(l);
1478 result->re = ref(ltype(l, rtn->lens_type));
1484 static struct prod *make_prod(struct rtn *rtn, struct lens *l) {
1485 struct prod *result = NULL;
1489 ERR_NOMEM(r < 0, l->info);
1491 result->lens = ref(l);
1492 r = ALLOC(result->start);
1493 ERR_NOMEM(r < 0, l->info);
1495 result->end = add_state(result);
1498 result->end->next = rtn->states;
1499 rtn->states = result->start;
1507 static struct prod *prod_for_lens(struct rtn *rtn, struct lens *l) {
1510 for (int i=0; i < rtn->nprod; i++) {
1511 if (rtn->prod[i]->lens == l)
1512 return rtn->prod[i];
1517 static void rtn_dot(struct rtn *rtn, const char *stage) {
1521 fp = debug_fopen("rtn_%s_%s.dot", stage, lens_type_names[rtn->lens_type]);
1525 fprintf(fp, "digraph \"l1\" {\n rankdir=LR;\n");
1526 list_for_each(s, rtn->states) {
1528 for (int p=0; p < rtn->nprod; p++) {
1529 if (s == rtn->prod[p]->start) {
1530 r = xasprintf(&label, "s%d", p);
1531 } else if (s == rtn->prod[p]->end) {
1532 r = xasprintf(&label, "e%d", p);
1534 ERR_NOMEM(r < 0, rtn->info);
1536 if (label == NULL) {
1537 r = xasprintf(&label, "%p", s);
1538 ERR_NOMEM(r < 0, rtn->info);
1540 fprintf(fp, " n%p [label = \"%s\"];\n", s, label == NULL ? "" : label);
1542 for (int i=0; i < s->ntrans; i++) {
1543 fprintf(fp, " n%p -> n%p", s, s->trans[i].to);
1544 if (s->trans[i].re != NULL) {
1545 label = regexp_escape(s->trans[i].re);
1546 for (char *t = label; *t; t++)
1549 fprintf(fp, " [ label = \"%s\" ]", label);
1560 /* Add transitions to RTN corresponding to cfg(l, N) */
1561 static void rtn_rules(struct rtn *rtn, struct lens *l) {
1565 struct prod *prod = prod_for_lens(rtn, l);
1569 int r = REALLOC_N(rtn->prod, rtn->nprod+1);
1570 ERR_NOMEM(r < 0, l->info);
1572 prod = make_prod(rtn, l);
1573 rtn->prod[rtn->nprod] = prod;
1577 struct state *start = prod->start;
1581 /* cfg(l1|..|ln, N) -> N := N1 | N2 | ... | Nn */
1582 for (int i=0; i < l->nchildren; i++) {
1583 add_trans(rtn, start, prod->end, l->children[i]);
1585 rtn_rules(rtn, l->children[i]);
1590 /* cfg(l1 . l2 ... ln, N) -> N := N1 . N2 ... Nn */
1591 for (int i=0; i < l->nchildren-1; i++) {
1592 struct state *s = add_state(prod);
1594 add_trans(rtn, start, s, l->children[i]);
1597 rtn_rules(rtn, l->children[i]);
1601 struct lens *c = l->children[l->nchildren - 1];
1602 add_trans(rtn, start, prod->end, c);
1609 /* cfg(l*, N) -> N := N . N' | eps */
1610 struct state *s = add_state(prod);
1612 add_trans(rtn, start, s, l);
1614 add_trans(rtn, s, prod->end, l->child);
1616 add_trans(rtn, start, prod->end, NULL);
1618 rtn_rules(rtn, l->child);
1623 switch (rtn->lens_type) {
1626 /* cfg([ l ], N) -> N := eps */
1627 add_trans(rtn, start, prod->end, NULL);
1630 /* cfg([ l ], N) -> N := N' plus cfg(l, N') */
1631 add_trans(rtn, start, prod->end, l->child);
1633 rtn_rules(rtn, l->child);
1637 /* At this point, we have propagated ktype and vtype */
1638 /* cfg([ l ], N) -> N := enc(l->ktype, l->vtype) */
1639 struct trans *t = add_trans(rtn, start, prod->end, NULL);
1641 t->re = subtree_atype(l->info, l->child->ktype, l->child->vtype);
1645 BUG_ON(true, rtn->info, "Unexpected lens type %d", rtn->lens_type);
1650 /* cfg(l?, N) -> N := N' | eps plus cfg(l, N') */
1651 add_trans(rtn, start, prod->end, l->child);
1653 add_trans(rtn, start, prod->end, NULL);
1655 rtn_rules(rtn, l->child);
1659 /* cfg(l, N) -> N := N' plus cfg(l->body, N') */
1660 add_trans(rtn, start, prod->end, l->body);
1662 rtn_rules(rtn, l->body);
1666 add_trans(rtn, start, prod->end, l->child);
1677 /* Replace transition t with two epsilon transitions s => p->start and
1678 * p->end => s->trans[i].to where s is the start of t. Instead of adding
1679 * epsilon transitions, we expand the epsilon transitions.
1681 static void prod_splice(struct rtn *rtn,
1682 struct prod *from, struct prod *to, struct trans *t) {
1684 add_trans(rtn, to->end, t->to, NULL);
1685 ERR_BAIL(from->lens->info);
1687 unref(t->re, regexp);
1693 static void rtn_splice(struct rtn *rtn, struct prod *prod) {
1694 for (struct state *s = prod->start; s != prod->end; s = s->next) {
1695 for (int i=0; i < s->ntrans; i++) {
1696 struct prod *p = prod_for_lens(rtn, s->trans[i].lens);
1698 prod_splice(rtn, prod, p, s->trans+i);
1707 static struct rtn *rtn_build(struct lens *rec, enum lens_type lt) {
1712 ERR_NOMEM(r < 0, rec->info);
1714 rtn->info = ref(rec->info);
1715 rtn->lens_type = lt;
1717 rtn_rules(rtn, rec);
1719 if (debugging("cf.approx"))
1720 rtn_dot(rtn, "10-rules");
1722 for (int i=0; i < rtn->nprod; i++) {
1723 rtn_splice(rtn, rtn->prod[i]);
1726 if (debugging("cf.approx"))
1727 rtn_dot(rtn, "11-splice");
1733 /* Compare transitions lexicographically by (to, lens) */
1734 static int trans_to_cmp(const void *v1, const void *v2) {
1735 const struct trans *t1 = v1;
1736 const struct trans *t2 = v2;
1738 if (t1->to != t2->to)
1739 return (t1->to < t2->to) ? -1 : 1;
1741 if (t1->lens == t2->lens)
1743 return (t1->lens < t2->lens) ? -1 : 1;
1746 /* Collapse a transition S1 -> S -> S2 by adding a transition S1 -> S2 with
1747 * lens R1 . (LOOP)* . R2 | R3 where R3 is the regexp on the possibly
1748 * existing transition S1 -> S2. If LOOP is NULL or R3 does not exist,
1749 * label the transition with a simplified regexp by treating NULL as
1751 static void collapse_trans(struct rtn *rtn,
1752 struct state *s1, struct state *s2,
1753 struct regexp *r1, struct regexp *loop,
1754 struct regexp *r2) {
1756 struct trans *t = NULL;
1757 struct regexp *r = NULL;
1759 for (int i=0; i < s1->ntrans; i++) {
1760 if (s1->trans[i].to == s2) {
1766 /* Set R = R1 . (LOOP)* . R2, treating NULL's as epsilon */
1770 else if (r2 == NULL)
1773 r = regexp_concat(rtn->info, r1, r2);
1775 struct regexp *s = regexp_iter(rtn->info, loop, 0, -1);
1776 ERR_NOMEM(s == NULL, rtn->info);
1777 struct regexp *c = NULL;
1782 c = regexp_concat(rtn->info, r1, s);
1784 ERR_NOMEM(c == NULL, rtn->info);
1790 r = regexp_concat(rtn->info, c, r2);
1792 ERR_NOMEM(r == NULL, rtn->info);
1797 t = add_trans(rtn, s1, s2, NULL);
1798 ERR_NOMEM(t == NULL, rtn->info);
1800 } else if (t->re == NULL) {
1801 if (r == NULL || regexp_matches_empty(r))
1804 t->re = regexp_maybe(rtn->info, r);
1806 ERR_NOMEM(t->re == NULL, rtn->info);
1808 } else if (r == NULL) {
1809 if (!regexp_matches_empty(t->re)) {
1810 r = regexp_maybe(rtn->info, t->re);
1811 unref(t->re, regexp);
1813 ERR_NOMEM(r == NULL, rtn->info);
1816 struct regexp *u = regexp_union(rtn->info, r, t->re);
1818 unref(t->re, regexp);
1820 ERR_NOMEM(u == NULL, rtn->info);
1825 rtn->exn = rtn->info->error->exn;
1829 /* Reduce the automaton with start state rprod->start and only accepting
1830 * state rprod->end so that we have a single transition rprod->start =>
1831 * rprod->end labelled with the overall approximating regexp for the
1834 * This is the same algorithm as fa_as_regexp in fa.c
1836 static struct regexp *rtn_reduce(struct rtn *rtn, struct lens *rec) {
1837 struct prod *prod = prod_for_lens(rtn, rec);
1840 ERR_THROW(prod == NULL, rtn->info, AUG_EINTERNAL,
1841 "No production for recursive lens");
1843 /* Eliminate epsilon transitions and turn transitions between the same
1844 * two states into a regexp union */
1845 list_for_each(s, rtn->states) {
1846 qsort(s->trans, s->ntrans, sizeof(*s->trans), trans_to_cmp);
1847 for (int i=0; i < s->ntrans; i++) {
1849 for (;j < s->ntrans && s->trans[i].to == s->trans[j].to;
1852 struct regexp *u, **v;
1853 r = ALLOC_N(v, j - i);
1854 ERR_NOMEM(r < 0, rtn->info);
1855 for (int k=i; k < j; k++)
1856 v[k-i] = s->trans[k].re;
1857 u = regexp_union_n(rtn->info, j - i, v);
1859 // FIXME: The calling convention for regexp_union_n
1860 // is bad, since we can't distinguish between alloc
1861 // failure and unioning all NULL's
1862 for (int k=0; k < j-i; k++)
1865 ERR_NOMEM(true, rtn->info);
1869 for (int k=i; k < j; k++) {
1870 unref(s->trans[k].lens, lens);
1871 unref(s->trans[k].re, regexp);
1874 MEMMOVE(s->trans + (i+1),
1877 s->ntrans -= j - (i + 1);
1882 /* Introduce new start and end states with epsilon transitions to/from
1883 * the old start and end states */
1884 struct state *end = NULL;
1885 struct state *start = NULL;
1886 if (ALLOC(start) < 0 || ALLOC(end) < 0) {
1889 ERR_NOMEM(true, rtn->info);
1891 list_insert_before(start, prod->start, rtn->states);
1892 end->next = prod->end->next;
1893 prod->end->next = end;
1895 add_trans(rtn, start, prod->start, NULL);
1897 add_trans(rtn, prod->end, end, NULL);
1900 prod->start = start;
1903 /* Eliminate states S (except for INI and FIN) one by one:
1904 * Let LOOP the regexp for the transition S -> S if it exists, epsilon
1906 * For all S1, S2 different from S with S1 -> S -> S2
1907 * Let R1 the regexp of S1 -> S
1908 * R2 the regexp of S -> S2
1909 * R3 the regexp of S1 -> S2 (or the regexp matching nothing
1910 * if no such transition)
1911 * set the regexp on the transition S1 -> S2 to
1912 * R1 . (LOOP)* . R2 | R3 */
1913 // FIXME: This does not go over all states
1914 list_for_each(s, rtn->states) {
1915 if (s == prod->end || s == prod->start)
1917 struct regexp *loop = NULL;
1918 for (int i=0; i < s->ntrans; i++) {
1919 if (s == s->trans[i].to) {
1920 ensure(loop == NULL, rtn->info);
1921 loop = s->trans[i].re;
1924 list_for_each(s1, rtn->states) {
1927 for (int t1=0; t1 < s1->ntrans; t1++) {
1928 if (s == s1->trans[t1].to) {
1929 for (int t2=0; t2 < s->ntrans; t2++) {
1930 struct state *s2 = s->trans[t2].to;
1933 collapse_trans(rtn, s1, s2,
1934 s1->trans[t1].re, loop,
1943 /* Find the overall regexp */
1944 struct regexp *result = NULL;
1945 for (int i=0; i < prod->start->ntrans; i++) {
1946 if (prod->start->trans[i].to == prod->end) {
1947 ensure(result == NULL, rtn->info);
1948 result = ref(prod->start->trans[i].re);
1956 static void propagate_type(struct lens *l, enum lens_type lt) {
1957 struct regexp **types = NULL;
1960 if (! l->recursive || ltype(l, lt) != NULL)
1965 r = ALLOC_N(types, l->nchildren);
1966 ERR_NOMEM(r < 0, l->info);
1967 for (int i=0; i < l->nchildren; i++) {
1968 propagate_type(l->children[i], lt);
1969 types[i] = ltype(l->children[i], lt);
1971 ltype(l, lt) = regexp_concat_n(l->info, l->nchildren, types);
1975 r = ALLOC_N(types, l->nchildren);
1976 ERR_NOMEM(r < 0, l->info);
1977 for (int i=0; i < l->nchildren; i++) {
1978 propagate_type(l->children[i], lt);
1979 types[i] = ltype(l->children[i], lt);
1981 ltype(l, lt) = regexp_union_n(l->info, l->nchildren, types);
1985 propagate_type(l->child, lt);
1987 l->atype = subtree_atype(l->info, l->child->ktype, l->child->vtype);
1989 l->ctype = ref(l->child->ctype);
1992 propagate_type(l->child, lt);
1993 ltype(l, lt) = regexp_iter(l->info, ltype(l->child, lt), 0, -1);
1996 propagate_type(l->child, lt);
1997 ltype(l, lt) = regexp_maybe(l->info, ltype(l->child, lt));
2003 propagate_type(l->child, lt);
2004 ltype(l, lt) = ref(ltype(l->child, lt));
2015 static struct value *typecheck(struct lens *l, int check);
2017 typedef struct value *typecheck_n_make(struct info *,
2018 struct lens *, struct lens *, int);
2020 static struct info *merge_info(struct info *i1, struct info *i2) {
2023 ERR_NOMEM(info == NULL, i1);
2025 info->filename = ref(i1->filename);
2026 info->first_line = i1->first_line;
2027 info->first_column = i1->first_column;
2028 info->last_line = i2->last_line;
2029 info->last_column = i2->last_column;
2030 info->error = i1->error;
2038 static struct value *typecheck_n(struct lens *l,
2039 typecheck_n_make *make, int check) {
2040 struct value *exn = NULL;
2041 struct lens *acc = NULL;
2043 ensure(l->tag == L_CONCAT || l->tag == L_UNION, l->info);
2044 for (int i=0; i < l->nchildren; i++) {
2045 exn = typecheck(l->children[i], check);
2049 acc = ref(l->children[0]);
2050 for (int i=1; i < l->nchildren; i++) {
2051 struct info *info = merge_info(acc->info, l->children[i]->info);
2052 ERR_BAIL(acc->info);
2053 exn = (*make)(info, acc, ref(l->children[i]), check);
2056 ensure(exn->tag == V_LENS, l->info);
2057 acc = ref(exn->lens);
2060 l->value = acc->value;
2067 static struct value *typecheck(struct lens *l, int check) {
2068 struct value *exn = NULL;
2070 /* Nonrecursive lenses are typechecked at build time */
2076 exn = typecheck_n(l, lns_make_concat, check);
2079 exn = typecheck_n(l, lns_make_union, check);
2083 exn = typecheck(l->child, check);
2087 exn = typecheck_iter(l->info, l->child);
2088 if (exn == NULL && l->value)
2089 exn = make_exn_value(l->info, "Multiple stores in iteration");
2090 if (exn == NULL && l->key)
2091 exn = make_exn_value(l->info, "Multiple keys/labels in iteration");
2095 exn = typecheck_maybe(l->info, l->child);
2096 l->key = l->child->key;
2097 l->value = l->child->value;
2110 static struct value *rtn_approx(struct lens *rec, enum lens_type lt) {
2111 struct rtn *rtn = NULL;
2112 struct value *result = NULL;
2114 rtn = rtn_build(rec, lt);
2116 ltype(rec, lt) = rtn_reduce(rtn, rec);
2118 if (debugging("cf.approx"))
2119 rtn_dot(rtn, "50-reduce");
2121 propagate_type(rec->body, lt);
2122 ERR_BAIL(rec->info);
2127 if (debugging("cf.approx")) {
2128 printf("approx %s => ", lens_type_names[lt]);
2129 print_regexp(stdout, ltype(rec, lt));
2135 if (rtn->exn == NULL)
2136 result = rec->info->error->exn;
2138 result = ref(rtn->exn);
2142 static struct value *
2143 exn_multiple_epsilons(struct lens *lens,
2144 struct lens *l1, struct lens *l2) {
2146 struct value *exn = NULL;
2148 exn = make_exn_value(ref(lens->info),
2149 "more than one nullable branch in a union");
2150 fi = format_info(l1->info);
2151 exn_printf_line(exn, "First nullable lens: %s", fi);
2154 fi = format_info(l2->info);
2155 exn_printf_line(exn, "Second nullable lens: %s", fi);
2161 /* Update lens->ctype_nullable and return 1 if there was a change,
2162 * 0 if there was none */
2163 static int ctype_nullable(struct lens *lens, struct value **exn) {
2166 struct lens *null_lens = NULL;
2168 if (! lens->recursive)
2174 for (int i=0; i < lens->nchildren; i++) {
2175 if (ctype_nullable(lens->children[i], exn))
2177 if (! lens->children[i]->ctype_nullable)
2182 for (int i=0; i < lens->nchildren; i++) {
2183 if (ctype_nullable(lens->children[i], exn))
2185 if (lens->children[i]->ctype_nullable) {
2187 *exn = exn_multiple_epsilons(lens, null_lens,
2192 null_lens = lens->children[i];
2198 ret = ctype_nullable(lens->child, exn);
2199 nullable = lens->child->ctype_nullable;
2206 nullable = lens->body->ctype_nullable;
2214 if (nullable != lens->ctype_nullable) {
2216 lens->ctype_nullable = nullable;
2221 struct value *lns_check_rec(struct info *info,
2222 struct lens *body, struct lens *rec,
2224 /* The types in the order of approximation */
2225 static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
2226 struct value *result = NULL;
2228 ensure(rec->tag == L_REC, info);
2229 ensure(rec->rec_internal, info);
2231 /* The user might have written down a regular lens with 'let rec' */
2232 if (! body->recursive) {
2233 result = make_lens_value(ref(body));
2234 ERR_NOMEM(result == NULL, info);
2238 /* To help memory management, we avoid the cycle inherent ina recursive
2239 * lens by using two instances of an L_REC lens. One is marked with
2240 * rec_internal, and used inside the body of the lens. The other is the
2241 * "toplevel" which receives external references.
2243 * The internal instance of the recursive lens is REC, the external one
2244 * is TOP, constructed below
2246 rec->body = body; /* REC does not own BODY */
2248 for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
2249 result = rtn_approx(rec, types[i]);
2253 if (rec->atype == NULL) {
2254 result = make_exn_value(ref(rec->info),
2255 "recursive lens generates the empty language for its %s",
2256 rec->ctype == NULL ? "ctype" : "atype");
2260 rec->key = rec->body->key;
2261 rec->value = rec->body->value;
2262 rec->consumes_value = rec->body->consumes_value;
2264 while(ctype_nullable(rec->body, &result));
2267 rec->ctype_nullable = rec->body->ctype_nullable;
2269 result = typecheck(rec->body, check);
2273 result = lns_make_rec(ref(rec->info));
2274 struct lens *top = result->lens;
2275 for (int t=0; t < ntypes; t++)
2276 ltype(top, t) = ref(ltype(rec, t));
2277 top->value = rec->value;
2278 top->key = rec->key;
2279 top->consumes_value = rec->consumes_value;
2280 top->ctype_nullable = rec->ctype_nullable;
2281 top->body = ref(body);
2283 top->rec_internal = 0;
2286 top->jmt = jmt_build(top);
2291 if (result != NULL && result->tag != V_EXN)
2292 unref(result, value);
2294 result = info->error->exn;
2299 void dump_lens_tree(struct lens *lens){
2300 static int count = 0;
2303 fp = debug_fopen("lens_%02d_%s.dot", count++, ltag(lens));
2307 fprintf(fp, "digraph \"%s\" {\n", "lens");
2308 dump_lens(fp, lens);
2314 void dump_lens(FILE *out, struct lens *lens){
2318 fprintf(out, "\"%p\" [ shape = box, label = \"%s\\n", lens, ltag(lens));
2320 for (int t=0; t < ntypes; t++) {
2321 re = ltype(lens, t);
2324 fprintf(out, "%s=",lens_type_names[t]);
2325 print_regexp(out, re);
2326 fprintf(out, "\\n");
2329 fprintf(out, "recursive=%x\\n", lens->recursive);
2330 fprintf(out, "rec_internal=%x\\n", lens->rec_internal);
2331 fprintf(out, "consumes_value=%x\\n", lens->consumes_value);
2332 fprintf(out, "ctype_nullable=%x\\n", lens->ctype_nullable);
2333 fprintf(out, "\"];\n");
2350 for(i = 0; i<lens->nchildren;i++){
2351 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2352 dump_lens(out, lens->children[i]);
2356 for(i = 0; i<lens->nchildren;i++){
2357 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2358 dump_lens(out, lens->children[i]);
2362 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2363 dump_lens(out, lens->child);
2366 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2367 dump_lens(out, lens->child);
2371 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2372 dump_lens(out, lens->child);
2376 if (lens->rec_internal == 0){
2377 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2378 dump_lens(out, lens->body);
2382 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2383 dump_lens(out, lens->child);
2386 fprintf(out, "ERROR\n");
2394 * indent-tabs-mode: nil