4 * Copyright (C) 2007-2015 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
31 /* This enum must be kept in sync with type_offs and ntypes */
33 CTYPE, ATYPE, KTYPE, VTYPE
36 static const int const type_offs[] = {
37 offsetof(struct lens, ctype),
38 offsetof(struct lens, atype),
39 offsetof(struct lens, ktype),
40 offsetof(struct lens, vtype)
42 static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
44 static const char *lens_type_names[] =
45 { "ctype", "atype", "ktype", "vtype" };
47 #define ltype(lns, t) *((struct regexp **) ((char *) lns + type_offs[t]))
49 static struct value * typecheck_union(struct info *,
50 struct lens *l1, struct lens *l2);
51 static struct value *typecheck_concat(struct info *,
52 struct lens *l1, struct lens *l2);
53 static struct value *typecheck_square(struct info *,
54 struct lens *l1, struct lens *l2);
55 static struct value *typecheck_iter(struct info *info, struct lens *l);
56 static struct value *typecheck_maybe(struct info *info, struct lens *l);
58 /* Lens names for pretty printing */
59 /* keep order in sync with enum type */
60 static const char *const tags[] = {
61 "del", "store", "value", "key", "label", "seq", "counter",
63 "subtree", "star", "maybe", "rec", "square"
66 #define ltag(lens) (tags[lens->tag - L_DEL])
68 static const struct string digits_string = {
69 .ref = REF_MAX, .str = (char *) "[0123456789]+"
71 static const struct string *const digits_pat = &digits_string;
73 char *format_lens(struct lens *l) {
74 char *inf = format_info(l->info);
77 xasprintf(&result, "%s[%s]%s", tags[l->tag - L_DEL], inf,
78 l->recursive ? "R" : "r");
83 #define BUG_LENS_TAG(lns) bug_lens_tag(lns, __FILE__, __LINE__)
85 static void bug_lens_tag(struct lens *lens, const char *file, int lineno) {
86 char *s = format_lens(lens);
88 if (lens != NULL && lens->info != NULL && lens->info->error != NULL) {
89 bug_on(lens->info->error, file, lineno, "Unexpected lens tag %s", s);
91 /* We are really screwed */
98 /* Construct a finite automaton from REGEXP and return it in *FA.
100 * Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors,
101 * return an exception.
103 static struct value *str_to_fa(struct info *info, const char *pattern,
104 struct fa **fa, int nocase) {
106 struct value *exn = NULL;
108 char *re_str = NULL, *re_err = NULL;
111 error = fa_compile(pattern, strlen(pattern), fa);
112 if (error == REG_NOERROR) {
114 error = fa_nocase(*fa);
115 ERR_NOMEM(error < 0, info);
120 re_str = escape(pattern, -1, RX_ESCAPES);
121 ERR_NOMEM(re_str == NULL, info);
123 exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
125 re_err_len = regerror(error, NULL, NULL, 0);
126 error = ALLOC_N(re_err, re_err_len);
127 ERR_NOMEM(error < 0, info);
129 regerror(error, NULL, re_err, re_err_len);
130 exn_printf_line(exn, "%s", re_err);
139 exn = info->error->exn;
143 static struct value *regexp_to_fa(struct regexp *regexp, struct fa **fa) {
144 return str_to_fa(regexp->info, regexp->pattern->str, fa, regexp->nocase);
147 static struct lens *make_lens(enum lens_tag tag, struct info *info) {
156 static struct lens *make_lens_unop(enum lens_tag tag, struct info *info,
157 struct lens *child) {
158 struct lens *lens = make_lens(tag, info);
160 lens->value = child->value;
161 lens->key = child->key;
165 typedef struct regexp *regexp_combinator(struct info *, int, struct regexp **);
167 static struct lens *make_lens_binop(enum lens_tag tag, struct info *info,
168 struct lens *l1, struct lens *l2,
169 regexp_combinator *combinator) {
170 struct lens *lens = make_lens(tag, info);
171 int n1 = (l1->tag == tag) ? l1->nchildren : 1;
172 struct regexp **types = NULL;
177 lens->nchildren = n1;
178 lens->nchildren += (l2->tag == tag) ? l2->nchildren : 1;
180 lens->recursive = l1->recursive || l2->recursive;
181 lens->rec_internal = l1->rec_internal || l2->rec_internal;
183 if (ALLOC_N(lens->children, lens->nchildren) < 0) {
188 if (l1->tag == tag) {
189 for (int i=0; i < l1->nchildren; i++)
190 lens->children[i] = ref(l1->children[i]);
193 lens->children[0] = l1;
196 if (l2->tag == tag) {
197 for (int i=0; i < l2->nchildren; i++)
198 lens->children[n1 + i] = ref(l2->children[i]);
201 lens->children[n1] = l2;
204 for (int i=0; i < lens->nchildren; i++) {
205 lens->value = lens->value || lens->children[i]->value;
206 lens->key = lens->key || lens->children[i]->key;
209 if (ALLOC_N(types, lens->nchildren) < 0)
212 if (! lens->rec_internal) {
213 /* Inside a recursive lens, we assign types with lns_check_rec
214 * once we know the entire lens */
215 for (int t=0; t < ntypes; t++) {
216 if (lens->recursive && t == CTYPE)
218 for (int i=0; i < lens->nchildren; i++)
219 types[i] = ltype(lens->children[i], t);
220 ltype(lens, t) = (*combinator)(info, lens->nchildren, types);
225 for (int i=0; i < lens->nchildren; i++)
226 ensure(tag != lens->children[i]->tag, lens->info);
235 static struct value *make_lens_value(struct lens *lens) {
237 v = make_value(V_LENS, ref(lens->info));
242 struct value *lns_make_union(struct info *info,
243 struct lens *l1, struct lens *l2, int check) {
244 struct lens *lens = NULL;
245 int consumes_value = l1->consumes_value && l2->consumes_value;
246 int recursive = l1->recursive || l2->recursive;
247 int ctype_nullable = l1->ctype_nullable || l2->ctype_nullable;
250 struct value *exn = typecheck_union(info, l1, l2);
255 lens = make_lens_binop(L_UNION, info, l1, l2, regexp_union_n);
256 lens->consumes_value = consumes_value;
258 lens->ctype_nullable = ctype_nullable;
259 return make_lens_value(lens);
262 struct value *lns_make_concat(struct info *info,
263 struct lens *l1, struct lens *l2, int check) {
264 struct lens *lens = NULL;
265 int consumes_value = l1->consumes_value || l2->consumes_value;
266 int recursive = l1->recursive || l2->recursive;
267 int ctype_nullable = l1->ctype_nullable && l2->ctype_nullable;
270 struct value *exn = typecheck_concat(info, l1, l2);
274 if (l1->value && l2->value) {
275 return make_exn_value(info, "Multiple stores in concat");
277 if (l1->key && l2->key) {
278 return make_exn_value(info, "Multiple keys/labels in concat");
281 lens = make_lens_binop(L_CONCAT, info, l1, l2, regexp_concat_n);
282 lens->consumes_value = consumes_value;
284 lens->ctype_nullable = ctype_nullable;
285 return make_lens_value(lens);
288 static struct regexp *subtree_atype(struct info *info,
289 struct regexp *ktype,
290 struct regexp *vtype) {
291 const char *kpat = (ktype == NULL) ? ENC_NULL : ktype->pattern->str;
292 const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
294 struct regexp *result = NULL;
295 char *ks = NULL, *vs = NULL;
298 if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
299 ks = regexp_expand_nocase(ktype);
300 vs = regexp_expand_nocase(vtype);
301 ERR_NOMEM(ks == NULL || vs == NULL, info);
302 if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
303 ERR_NOMEM(true, info);
306 if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
307 ERR_NOMEM(pat == NULL, info);
311 nocase = ktype->nocase;
312 else if (vtype != NULL)
313 nocase = vtype->nocase;
315 result = make_regexp(info, pat, nocase);
323 * A subtree lens l1 = [ l ]
325 * Types are assigned as follows:
327 * l1->ctype = l->ctype
328 * l1->atype = encode(l->ktype, l->vtype)
332 struct value *lns_make_subtree(struct info *info, struct lens *l) {
335 lens = make_lens_unop(L_SUBTREE, info, l);
336 lens->ctype = ref(l->ctype);
338 lens->atype = subtree_atype(info, l->ktype, l->vtype);
339 lens->value = lens->key = 0;
340 lens->recursive = l->recursive;
341 lens->rec_internal = l->rec_internal;
343 lens->ctype_nullable = l->ctype_nullable;
344 return make_lens_value(lens);
347 struct value *lns_make_star(struct info *info, struct lens *l, int check) {
351 struct value *exn = typecheck_iter(info, l);
356 return make_exn_value(info, "Multiple stores in iteration");
359 return make_exn_value(info, "Multiple keys/labels in iteration");
362 lens = make_lens_unop(L_STAR, info, l);
363 for (int t = 0; t < ntypes; t++) {
364 ltype(lens, t) = regexp_iter(info, ltype(l, t), 0, -1);
366 lens->recursive = l->recursive;
367 lens->rec_internal = l->rec_internal;
368 lens->ctype_nullable = 1;
369 return make_lens_value(lens);
372 struct value *lns_make_plus(struct info *info, struct lens *l, int check) {
373 struct value *star, *conc;
375 star = lns_make_star(info, l, check);
379 conc = lns_make_concat(ref(info), ref(l), ref(star->lens), check);
384 struct value *lns_make_maybe(struct info *info, struct lens *l, int check) {
388 struct value *exn = typecheck_maybe(info, l);
392 lens = make_lens_unop(L_MAYBE, info, l);
393 for (int t=0; t < ntypes; t++)
394 ltype(lens, t) = regexp_maybe(info, ltype(l, t));
395 lens->value = l->value;
397 lens->recursive = l->recursive;
398 lens->rec_internal = l->rec_internal;
399 lens->ctype_nullable = 1;
400 return make_lens_value(lens);
403 /* The ctype of SQR is a regular approximation of the true ctype of SQR
404 * at this point. In some situations, for example in processing quoted
405 * strings this leads to false typecheck errors; to lower the chances
406 * of these, we try to construct the precise ctype of SQR if the
407 * language of L1 is finite (and has a small number of words)
409 static void square_precise_type(struct info *info,
412 struct regexp *body) {
416 struct fa *fa = NULL;
417 struct value *exn = NULL;
418 struct regexp **u = NULL, *c[3], *w = NULL;
420 exn = str_to_fa(info, left->pattern->str, &fa, left->nocase);
424 nwords = fa_enumerate(fa, 10, &words); /* The limit of 10 is arbitrary */
428 r = ALLOC_N(u, nwords);
429 ERR_NOMEM(r < 0, info);
432 for (int i=0; i < nwords; i++) {
433 w = make_regexp_literal(left->info, words[i]);
434 ERR_NOMEM(w == NULL, info);
435 w->nocase = left->nocase;
438 u[i] = regexp_concat_n(info, 3, c);
441 ERR_NOMEM(u[i] == NULL, info);
443 w = regexp_union_n(info, nwords, u);
452 for (int i=0; i < nwords; i++) {
463 /* Build a square lens as
464 * left . body . right
465 * where left and right accepts the same language and
466 * captured strings must match. The inability to express this with other
467 * lenses makes the square primitive necessary.
469 struct value * lns_make_square(struct info *info, struct lens *l1,
470 struct lens *l2, struct lens *l3, int check) {
471 struct value *cnt1 = NULL, *cnt2 = NULL, *res = NULL;
472 struct lens *sqr = NULL;
474 /* supported types: L_KEY . body . L_DEL or L_DEL . body . L_DEL */
475 if (l3->tag != L_DEL || (l1->tag != L_DEL && l1->tag != L_KEY))
476 return make_exn_value(info, "Supported types: (key lns del) or (del lns del)");
478 res = typecheck_square(info, l1, l3);
482 res = lns_make_concat(ref(info), ref(l1), ref(l2), check);
486 res = lns_make_concat(ref(info), ref(cnt1->lens), ref(l3), check);
491 sqr = make_lens_unop(L_SQUARE, ref(info), ref(cnt2->lens));
492 ERR_NOMEM(sqr == NULL, info);
494 for (int t=0; t < ntypes; t++)
495 ltype(sqr, t) = ref(ltype(cnt2->lens, t));
497 square_precise_type(info, &(sqr->ctype), l1->ctype, l2->ctype);
499 sqr->recursive = cnt2->lens->recursive;
500 sqr->rec_internal = cnt2->lens->rec_internal;
501 sqr->consumes_value = cnt2->lens->consumes_value;
503 res = make_lens_value(sqr);
504 ERR_NOMEM(res == NULL, info);
522 static struct regexp *make_regexp_from_string(struct info *info,
523 struct string *string) {
528 r->pattern = ref(string);
534 static struct regexp *restrict_regexp(struct regexp *r) {
536 struct regexp *result = NULL;
540 ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
542 RESERVED_FROM, RESERVED_TO);
543 ERR_NOMEM(ret == REG_ESPACE || ret < 0, r->info);
544 BUG_ON(ret != 0, r->info, NULL);
545 ensure(nre_len == strlen(nre), r->info);
547 ret = regexp_c_locale(&nre, &nre_len);
548 ERR_NOMEM(ret < 0, r->info);
550 result = make_regexp(r->info, nre, r->nocase);
552 BUG_ON(regexp_compile(result) != 0, r->info,
553 "Could not compile restricted regexp");
558 unref(result, regexp);
562 struct value *lns_make_prim(enum lens_tag tag, struct info *info,
563 struct regexp *regexp, struct string *string) {
564 struct lens *lens = NULL;
565 struct value *exn = NULL;
566 struct fa *fa_slash = NULL;
567 struct fa *fa_key = NULL;
568 struct fa *fa_isect = NULL;
572 exn = str_to_fa(info, "(.|\n)*/(.|\n)*", &fa_slash, regexp->nocase);
576 exn = regexp_to_fa(regexp, &fa_key);
580 fa_isect = fa_intersect(fa_slash, fa_key);
581 if (! fa_is_basic(fa_isect, FA_EMPTY)) {
582 exn = make_exn_value(info,
583 "The key regexp /%s/ matches a '/'",
584 regexp->pattern->str);
590 fa_isect = fa_key = fa_slash = NULL;
591 } else if (tag == L_LABEL) {
592 if (strchr(string->str, SEP) != NULL) {
593 exn = make_exn_value(info,
594 "The label string \"%s\" contains a '/'",
598 } else if (tag == L_DEL && string != NULL) {
600 const char *dflt = string->str;
601 cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
602 if (cnt != strlen(dflt)) {
603 char *s = escape(dflt, -1, RX_ESCAPES);
604 char *r = regexp_escape(regexp);
605 exn = make_exn_value(info,
606 "del: the default value '%s' does not match /%s/",
614 /* Build the actual lens */
615 lens = make_lens(tag, info);
616 lens->regexp = regexp;
617 lens->string = string;
618 lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ);
619 lens->value = (tag == L_STORE || tag == L_VALUE);
620 lens->consumes_value = (tag == L_STORE || tag == L_VALUE);
621 lens->atype = regexp_make_empty(info);
623 if (tag == L_DEL || tag == L_STORE || tag == L_KEY) {
624 lens->ctype = ref(regexp);
625 lens->ctype_nullable = regexp_matches_empty(lens->ctype);
626 } else if (tag == L_LABEL || tag == L_VALUE
627 || tag == L_SEQ || tag == L_COUNTER) {
628 lens->ctype = regexp_make_empty(info);
629 lens->ctype_nullable = 1;
639 make_regexp_from_string(info, (struct string *) digits_pat);
640 if (lens->ktype == NULL)
642 } else if (tag == L_KEY) {
643 lens->ktype = restrict_regexp(lens->regexp);
644 } else if (tag == L_LABEL) {
645 lens->ktype = make_regexp_literal(info, lens->string->str);
646 if (lens->ktype == NULL)
651 if (tag == L_STORE) {
652 lens->vtype = restrict_regexp(lens->regexp);
653 } else if (tag == L_VALUE) {
654 lens->vtype = make_regexp_literal(info, lens->string->str);
657 return make_lens_value(lens);
666 * Typechecking of lenses
668 static struct value *disjoint_check(struct info *info, bool is_get,
669 struct regexp *r1, struct regexp *r2) {
670 struct fa *fa1 = NULL;
671 struct fa *fa2 = NULL;
672 struct fa *fa = NULL;
673 struct value *exn = NULL;
674 const char *const msg = is_get ? "union.get" : "tree union.put";
676 if (r1 == NULL || r2 == NULL)
679 exn = regexp_to_fa(r1, &fa1);
683 exn = regexp_to_fa(r2, &fa2);
687 fa = fa_intersect(fa1, fa2);
688 if (! fa_is_basic(fa, FA_EMPTY)) {
691 fa_example(fa, &xmpl, &xmpl_len);
693 char *fmt = enc_format(xmpl, xmpl_len);
699 exn = make_exn_value(ref(info),
700 "overlapping lenses in %s", msg);
703 exn_printf_line(exn, "Example matched by both: '%s'", xmpl);
705 exn_printf_line(exn, "Example matched by both: %s", xmpl);
717 static struct value *typecheck_union(struct info *info,
718 struct lens *l1, struct lens *l2) {
719 struct value *exn = NULL;
721 exn = disjoint_check(info, true, l1->ctype, l2->ctype);
723 exn = disjoint_check(info, false, l1->atype, l2->atype);
726 char *fi = format_info(l1->info);
727 exn_printf_line(exn, "First lens: %s", fi);
730 fi = format_info(l2->info);
731 exn_printf_line(exn, "Second lens: %s", fi);
737 static struct value *
738 ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
739 enum lens_type typ, struct lens *l1, struct lens *l2,
740 const char *msg, bool iterated) {
743 struct value *exn = NULL;
746 r = fa_ambig_example(fa1, fa2, &upv, &upv_len, &pv, &v);
748 exn = make_exn_value(ref(info), "not enough memory");
752 ERR_REPORT(info, AUG_ENOMEM, NULL);
753 return info->error->exn;
758 char *e_u, *e_up, *e_upv, *e_pv, *e_v;
762 e_u = enc_format(upv, pv - upv);
763 e_up = enc_format(upv, v - upv);
764 e_upv = enc_format(upv, upv_len);
765 e_pv = enc_format(pv, strlen(pv));
766 e_v = enc_format(v, strlen(v));
767 lns_format_atype(l1, &s1);
768 lns_format_atype(l2, &s2);
770 e_u = escape(upv, pv - upv, RX_ESCAPES);
771 e_up = escape(upv, v - upv, RX_ESCAPES);
772 e_upv = escape(upv, -1, RX_ESCAPES);
773 e_pv = escape(pv, -1, RX_ESCAPES);
774 e_v = escape(v, -1, RX_ESCAPES);
775 s1 = regexp_escape(ltype(l1, typ));
776 s2 = regexp_escape(ltype(l2, typ));
778 exn = make_exn_value(ref(info), "%s", msg);
780 exn_printf_line(exn, " Iterated regexp: /%s/", s1);
782 exn_printf_line(exn, " First regexp: /%s/", s1);
783 exn_printf_line(exn, " Second regexp: /%s/", s2);
785 exn_printf_line(exn, " '%s' can be split into", e_upv);
786 exn_printf_line(exn, " '%s|=|%s'\n", e_u, e_pv);
787 exn_printf_line(exn, " and");
788 exn_printf_line(exn, " '%s|=|%s'\n", e_up, e_v);
801 static struct value *
802 ambig_concat_check(struct info *info, const char *msg,
803 enum lens_type typ, struct lens *l1, struct lens *l2) {
804 struct fa *fa1 = NULL;
805 struct fa *fa2 = NULL;
806 struct value *result = NULL;
807 struct regexp *r1 = ltype(l1, typ);
808 struct regexp *r2 = ltype(l2, typ);
810 if (r1 == NULL || r2 == NULL)
813 result = regexp_to_fa(r1, &fa1);
817 result = regexp_to_fa(r2, &fa2);
821 result = ambig_check(info, fa1, fa2, typ, l1, l2, msg, false);
828 static struct value *typecheck_concat(struct info *info,
829 struct lens *l1, struct lens *l2) {
830 struct value *result = NULL;
832 result = ambig_concat_check(info, "ambiguous concatenation",
834 if (result == NULL) {
835 result = ambig_concat_check(info, "ambiguous tree concatenation",
838 if (result != NULL) {
839 char *fi = format_info(l1->info);
840 exn_printf_line(result, "First lens: %s", fi);
842 fi = format_info(l2->info);
843 exn_printf_line(result, "Second lens: %s", fi);
849 static struct value *make_exn_square(struct info *info, struct lens *l1,
850 struct lens *l2, const char *msg) {
853 struct value *exn = make_exn_value(ref(info), "%s",
854 "Inconsistency in lens square");
855 exn_printf_line(exn, "%s", msg);
856 fi = format_info(l1->info);
857 exn_printf_line(exn, "Left lens: %s", fi);
859 fi = format_info(l2->info);
860 exn_printf_line(exn, "Right lens: %s", fi);
865 static struct value *typecheck_square(struct info *info, struct lens *l1,
868 struct value *exn = NULL;
869 struct fa *fa1 = NULL, *fa2 = NULL;
870 struct regexp *r1 = ltype(l1, CTYPE);
871 struct regexp *r2 = ltype(l2, CTYPE);
873 if (r1 == NULL || r2 == NULL)
876 exn = regexp_to_fa(r1, &fa1);
880 exn = regexp_to_fa(r2, &fa2);
884 r = fa_equals(fa1, fa2);
887 exn = make_exn_value(ref(info), "not enough memory");
891 ERR_REPORT(info, AUG_ENOMEM, NULL);
892 return info->error->exn;;
897 exn = make_exn_square(info, l1, l2,
898 "Left and right lenses must accept the same language");
902 /* check del create consistency */
903 if (l1->tag == L_DEL && l2->tag == L_DEL) {
904 if (!STREQ(l1->string->str, l2->string->str)) {
905 exn = make_exn_square(info, l1, l2,
906 "Left and right lenses must have the same default value");
917 static struct value *
918 ambig_iter_check(struct info *info, const char *msg,
919 enum lens_type typ, struct lens *l) {
920 struct fa *fas = NULL, *fa = NULL;
921 struct value *result = NULL;
922 struct regexp *r = ltype(l, typ);
927 result = regexp_to_fa(r, &fa);
931 fas = fa_iter(fa, 0, -1);
933 result = ambig_check(info, fa, fas, typ, l, l, msg, true);
941 static struct value *typecheck_iter(struct info *info, struct lens *l) {
942 struct value *result = NULL;
944 result = ambig_iter_check(info, "ambiguous iteration", CTYPE, l);
945 if (result == NULL) {
946 result = ambig_iter_check(info, "ambiguous tree iteration", ATYPE, l);
948 if (result != NULL) {
949 char *fi = format_info(l->info);
950 exn_printf_line(result, "Iterated lens: %s", fi);
956 static struct value *typecheck_maybe(struct info *info, struct lens *l) {
957 /* Check (r)? as (<e>|r) where <e> is the empty language */
958 struct value *exn = NULL;
960 if (l->ctype != NULL && regexp_matches_empty(l->ctype)) {
961 exn = make_exn_value(ref(info),
962 "illegal optional expression: /%s/ matches the empty word",
963 l->ctype->pattern->str);
966 /* Typecheck the put direction; the check passes if
967 (1) the atype does not match the empty string, because we can tell
968 from looking at tree nodes whether L should be applied or not
969 (2) L handles a value; with that, we know whether to apply L or not
970 depending on whether the current node has a non NULL value or not
972 if (exn == NULL && ! l->consumes_value) {
973 if (l->atype != NULL && regexp_matches_empty(l->atype)) {
974 exn = make_exn_value(ref(info),
975 "optional expression matches the empty tree but does not consume a value");
981 void free_lens(struct lens *lens) {
984 ensure(lens->ref == 0, lens->info);
986 if (debugging("lenses"))
987 dump_lens_tree(lens);
990 unref(lens->regexp, regexp);
991 unref(lens->string, string);
995 unref(lens->regexp, regexp);
1001 unref(lens->string, string);
1007 unref(lens->child, lens);
1011 for (int i=0; i < lens->nchildren; i++)
1012 unref(lens->children[i], lens);
1013 free(lens->children);
1016 if (!lens->rec_internal) {
1017 unref(lens->body, lens);
1025 for (int t=0; t < ntypes; t++)
1026 unref(ltype(lens, t), regexp);
1028 unref(lens->info, info);
1029 jmt_free(lens->jmt);
1035 void lens_release(struct lens *lens) {
1039 for (int t=0; t < ntypes; t++)
1040 regexp_release(ltype(lens, t));
1042 if (lens->tag == L_KEY || lens->tag == L_STORE)
1043 regexp_release(lens->regexp);
1045 if (lens->tag == L_SUBTREE || lens->tag == L_STAR
1046 || lens->tag == L_MAYBE || lens->tag == L_SQUARE) {
1047 lens_release(lens->child);
1050 if (lens->tag == L_UNION || lens->tag == L_CONCAT) {
1051 for (int i=0; i < lens->nchildren; i++) {
1052 lens_release(lens->children[i]);
1056 jmt_free(lens->jmt);
1061 * Encoding of tree levels
1063 char *enc_format(const char *e, size_t len) {
1064 return enc_format_indent(e, len, 0);
1067 char *enc_format_indent(const char *e, size_t len, int indent) {
1069 char *result = NULL, *r;
1072 while (*k && k - e < len) {
1073 char *eq, *slash, *v;
1074 eq = strchr(k, ENC_EQ_CH);
1076 slash = strchr(eq, ENC_SLASH_CH);
1077 assert(slash != NULL);
1082 size += 6; /* Surrounding braces */
1084 size += 1 + (eq - k) + 1;
1086 size += 4 + (slash - v) + 1;
1089 if (ALLOC_N(result, size + 1) < 0)
1094 while (*k && k - e < len) {
1095 char *eq, *slash, *v;
1096 eq = strchr(k, ENC_EQ_CH);
1097 slash = strchr(eq, ENC_SLASH_CH);
1098 assert(eq != NULL && slash != NULL);
1101 for (int i=0; i < indent; i++)
1103 r = stpcpy(r, " { ");
1105 r = stpcpy(r, "\"");
1106 r = stpncpy(r, k, eq - k);
1107 r = stpcpy(r, "\"");
1110 r = stpcpy (r, " = \"");
1111 r = stpncpy(r, v, slash - v);
1112 r = stpcpy(r, "\"");
1114 r = stpcpy(r, " }");
1122 static int format_atype(struct lens *l, char **buf, uint indent);
1124 static int format_indent(char **buf, uint indent) {
1125 if (ALLOC_N(*buf, indent+1) < 0)
1127 memset(*buf, ' ', indent);
1131 static int format_subtree_atype(struct lens *l, char **buf, uint indent) {
1132 char *k = NULL, *v = NULL;
1133 const struct regexp *ktype = l->child->ktype;
1134 const struct regexp *vtype = l->child->vtype;
1138 if (format_indent(&si, indent) < 0)
1141 if (ktype != NULL) {
1142 k = regexp_escape(ktype);
1146 if (vtype != NULL) {
1147 v = regexp_escape(vtype);
1151 r = xasprintf(buf, "%s{ = /%s/ }", si, k, v);
1153 r = xasprintf(buf, "%s{ /%s/ = /%s/ }", si, k, v);
1156 r = xasprintf(buf, "%s{ }", si, k);
1158 r = xasprintf(buf, "%s{ /%s/ }", si, k);
1171 static int format_rep_atype(struct lens *l, char **buf,
1172 uint indent, char quant) {
1176 r = format_atype(l->child, &a, indent);
1179 if (strlen(a) == 0) {
1186 if (l->child->tag == L_CONCAT || l->child->tag == L_UNION)
1187 r = xasprintf(buf, "(%s)%c", a, quant);
1189 r = xasprintf(buf, "%s%c", a, quant);
1200 static int format_concat_atype(struct lens *l, char **buf, uint indent) {
1201 char **c = NULL, *s = NULL, *p;
1203 size_t len = 0, nconc = 0;
1205 if (ALLOC_N(c, l->nchildren) < 0)
1208 for (int i=0; i < l->nchildren; i++) {
1209 r = format_atype(l->children[i], c+i, indent);
1212 len += strlen(c[i]) + 3;
1213 if (strlen(c[i]) > 0)
1215 if (l->children[i]->tag == L_UNION)
1219 if (ALLOC_N(s, len+1) < 0)
1222 for (int i=0; i < l->nchildren; i++) {
1223 bool needs_parens = nconc > 1 && l->children[i]->tag == L_UNION;
1224 if (strlen(c[i]) == 0)
1230 for (int j=0; j < indent; j++)
1244 for (int i=0; i < l->nchildren; i++)
1251 static int format_union_atype(struct lens *l, char **buf, uint indent) {
1252 char **c = NULL, *s = NULL, *p;
1256 if (ALLOC_N(c, l->nchildren) < 0)
1259 /* Estimate the length of the string we will build. The calculation
1260 overestimates that length so that the logic is a little simpler than
1261 in the loop where we actually build the string */
1262 for (int i=0; i < l->nchildren; i++) {
1263 r = format_atype(l->children[i], c+i, indent + 2);
1266 /* We will add c[i] and some fixed characters */
1267 len += strlen(c[i]) + strlen("\n| ()");
1268 if (strlen(c[i]) < indent+2) {
1269 /* We will add indent+2 whitespace */
1274 if (ALLOC_N(s, len+1) < 0)
1278 for (int i=0; i < l->nchildren; i++) {
1282 if (strlen(t) >= indent+2) {
1283 /* c[i] is not just whitespace */
1284 p = stpncpy(p, t, indent+2);
1287 /* c[i] is just whitespace, make sure we indent the
1288 '|' appropriately */
1289 memset(p, ' ', indent+2);
1292 p = stpcpy(p, "| ");
1294 /* Skip additional indent */
1298 p = stpcpy(p, "()");
1307 for (int i=0; i < l->nchildren; i++)
1314 static int format_rec_atype(struct lens *l, char **buf, uint indent) {
1317 if (l->rec_internal) {
1318 *buf = strdup("<<rec>>");
1319 return (*buf == NULL) ? -1 : 0;
1323 r = format_atype(l->body, &c, indent);
1326 r = xasprintf(buf, "<<rec:%s>>", c);
1328 return (r < 0) ? -1 : 0;
1331 static int format_atype(struct lens *l, char **buf, uint indent) {
1343 return (*buf == NULL) ? -1 : 0;
1346 return format_subtree_atype(l, buf, indent);
1349 return format_rep_atype(l, buf, indent, '*');
1352 return format_rep_atype(l, buf, indent, '?');
1355 return format_concat_atype(l, buf, indent);
1358 return format_union_atype(l, buf, indent);
1361 return format_rec_atype(l, buf, indent);
1364 return format_concat_atype(l->child, buf, indent);
1373 int lns_format_atype(struct lens *l, char **buf) {
1375 r = format_atype(l, buf, 4);
1382 struct value *lns_make_rec(struct info *info) {
1383 struct lens *l = make_lens(L_REC, info);
1385 l->rec_internal = 1;
1387 return make_lens_value(l);
1390 /* Transform a recursive lens into a recursive transition network
1392 * First, we transform the lens into context free grammar, considering any
1393 * nonrecursive lens as a terminal
1395 * cfg: lens -> nonterminal -> production list
1397 * cfg(primitive, N) -> N := regexp(primitive)
1398 * cfg(l1 . l2, N) -> N := N1 . N2 + cfg(l1, N1) + cfg(l2, N2)
1399 * cfg(l1 | l2, N) -> N := N1 | N2 + cfg(l1, N1) + cfg(l2, N2)
1400 * cfg(l*, N) -> N := N . N' | eps + cfg(l, N')
1401 * cfg([ l ], N) -> N := N' + cfg(l, N')
1403 * We use the lenses as nonterminals themselves; this also means that our
1404 * productions are normalized such that the RHS is either a terminal
1405 * (regexp) or entirely consists of nonterminals
1407 * In a few places, we need to know that a nonterminal corresponds to a
1408 * subtree combinator ([ l ]); this is the main reason that the rule (cfg[
1409 * l ], N) introduces a useless production N := N'.
1411 * Computing the types for a recursive lens r is (fairly) straightforward,
1412 * given the above grammar, which we convert to an automaton following
1413 * http://arxiv.org/abs/cs/9910022; the only complication arises from the
1414 * subtree combinator, since it can be used in recursive lenses to
1415 * construct trees of arbitrary depth, but we need to approximate the types
1416 * of r in a way that fits with our top-down tree automaton in put.c.
1418 * To handle subtree combinators, remember that the type rules for a lens
1424 * m.atype = enc(l.ktype, l.vtype)
1425 * ( enc is a function regexp -> regexp -> regexp)
1427 * We compute types for r by modifying its automaton according to
1428 * Nederhof's paper and reducing it to a regular expression of lenses. This
1429 * has to happen in the following steps:
1430 * r.ktype : approximate by using [ .. ].ktype = NULL
1431 * r.vtype : same as r.ktype
1432 * r.ctype : approximate by treating [ l ] as l
1433 * r.atype : approximate by using r.ktype and r.vtype from above
1434 * in lens expressions [ f(r) ]
1437 /* Transitions go to a state and are labeled with a lens. For epsilon
1438 * transitions, lens may be NULL. When lens is a simple (nonrecursive
1439 * lens), PROD will be NULL. When we modify the automaton to splice
1440 * nonterminals in, we remember the production for the nonterminal in PROD.
1449 struct state *next; /* Linked list for memory management */
1451 struct trans *trans;
1454 /* Productions for lens LENS. Start state START and end state END. If we
1455 start with START, END is the only accepting state. */
1458 struct state *start;
1462 /* A recursive transition network used to compute regular approximations
1468 struct state *states; /* Linked list through next of all states in all
1469 prods; the states for each production are on
1470 the part of the list from prod->start to
1473 enum lens_type lens_type;
1474 unsigned int check : 1;
1477 #define RTN_BAIL(rtn) if ((rtn)->exn != NULL || \
1478 (rtn)->info->error->code != AUG_NOERROR) \
1481 static void free_prod(struct prod *prod) {
1484 unref(prod->lens, lens);
1488 static void free_rtn(struct rtn *rtn) {
1491 for (int i=0; i < rtn->nprod; i++)
1492 free_prod(rtn->prod[i]);
1494 list_for_each(s, rtn->states) {
1495 for (int i=0; i < s->ntrans; i++) {
1496 unref(s->trans[i].lens, lens);
1497 unref(s->trans[i].re, regexp);
1501 list_free(rtn->states);
1502 unref(rtn->info, info);
1503 unref(rtn->exn, value);
1507 static struct state *add_state(struct prod *prod) {
1508 struct state *result = NULL;
1512 ERR_NOMEM(r < 0, prod->lens->info);
1514 list_cons(prod->start->next, result);
1519 static struct trans *add_trans(struct rtn *rtn, struct state *state,
1520 struct state *to, struct lens *l) {
1522 struct trans *result = NULL;
1524 for (int i=0; i < state->ntrans; i++)
1525 if (state->trans[i].to == to && state->trans[i].lens == l)
1526 return state->trans + i;
1528 r = REALLOC_N(state->trans, state->ntrans+1);
1529 ERR_NOMEM(r < 0, rtn->info);
1531 result = state->trans + state->ntrans;
1537 result->lens = ref(l);
1538 result->re = ref(ltype(l, rtn->lens_type));
1544 static struct prod *make_prod(struct rtn *rtn, struct lens *l) {
1545 struct prod *result = NULL;
1549 ERR_NOMEM(r < 0, l->info);
1551 result->lens = ref(l);
1552 r = ALLOC(result->start);
1553 ERR_NOMEM(r < 0, l->info);
1555 result->end = add_state(result);
1558 result->end->next = rtn->states;
1559 rtn->states = result->start;
1567 static struct prod *prod_for_lens(struct rtn *rtn, struct lens *l) {
1570 for (int i=0; i < rtn->nprod; i++) {
1571 if (rtn->prod[i]->lens == l)
1572 return rtn->prod[i];
1577 static void rtn_dot(struct rtn *rtn, const char *stage) {
1581 fp = debug_fopen("rtn_%s_%s.dot", stage, lens_type_names[rtn->lens_type]);
1585 fprintf(fp, "digraph \"l1\" {\n rankdir=LR;\n");
1586 list_for_each(s, rtn->states) {
1588 for (int p=0; p < rtn->nprod; p++) {
1589 if (s == rtn->prod[p]->start) {
1590 r = xasprintf(&label, "s%d", p);
1591 } else if (s == rtn->prod[p]->end) {
1592 r = xasprintf(&label, "e%d", p);
1594 ERR_NOMEM(r < 0, rtn->info);
1596 if (label == NULL) {
1597 r = xasprintf(&label, "%p", s);
1598 ERR_NOMEM(r < 0, rtn->info);
1600 fprintf(fp, " n%p [label = \"%s\"];\n", s, label == NULL ? "" : label);
1602 for (int i=0; i < s->ntrans; i++) {
1603 fprintf(fp, " n%p -> n%p", s, s->trans[i].to);
1604 if (s->trans[i].re != NULL) {
1605 label = regexp_escape(s->trans[i].re);
1606 for (char *t = label; *t; t++)
1609 fprintf(fp, " [ label = \"%s\" ]", label);
1620 /* Add transitions to RTN corresponding to cfg(l, N) */
1621 static void rtn_rules(struct rtn *rtn, struct lens *l) {
1625 struct prod *prod = prod_for_lens(rtn, l);
1629 int r = REALLOC_N(rtn->prod, rtn->nprod+1);
1630 ERR_NOMEM(r < 0, l->info);
1632 prod = make_prod(rtn, l);
1633 rtn->prod[rtn->nprod] = prod;
1637 struct state *start = prod->start;
1641 /* cfg(l1|..|ln, N) -> N := N1 | N2 | ... | Nn */
1642 for (int i=0; i < l->nchildren; i++) {
1643 add_trans(rtn, start, prod->end, l->children[i]);
1645 rtn_rules(rtn, l->children[i]);
1650 /* cfg(l1 . l2 ... ln, N) -> N := N1 . N2 ... Nn */
1651 for (int i=0; i < l->nchildren-1; i++) {
1652 struct state *s = add_state(prod);
1654 add_trans(rtn, start, s, l->children[i]);
1657 rtn_rules(rtn, l->children[i]);
1661 struct lens *c = l->children[l->nchildren - 1];
1662 add_trans(rtn, start, prod->end, c);
1669 /* cfg(l*, N) -> N := N . N' | eps */
1670 struct state *s = add_state(prod);
1672 add_trans(rtn, start, s, l);
1674 add_trans(rtn, s, prod->end, l->child);
1676 add_trans(rtn, start, prod->end, NULL);
1678 rtn_rules(rtn, l->child);
1683 switch (rtn->lens_type) {
1686 /* cfg([ l ], N) -> N := eps */
1687 add_trans(rtn, start, prod->end, NULL);
1690 /* cfg([ l ], N) -> N := N' plus cfg(l, N') */
1691 add_trans(rtn, start, prod->end, l->child);
1693 rtn_rules(rtn, l->child);
1697 /* At this point, we have propagated ktype and vtype */
1698 /* cfg([ l ], N) -> N := enc(l->ktype, l->vtype) */
1699 struct trans *t = add_trans(rtn, start, prod->end, NULL);
1701 t->re = subtree_atype(l->info, l->child->ktype, l->child->vtype);
1705 BUG_ON(true, rtn->info, "Unexpected lens type %d", rtn->lens_type);
1710 /* cfg(l?, N) -> N := N' | eps plus cfg(l, N') */
1711 add_trans(rtn, start, prod->end, l->child);
1713 add_trans(rtn, start, prod->end, NULL);
1715 rtn_rules(rtn, l->child);
1719 /* cfg(l, N) -> N := N' plus cfg(l->body, N') */
1720 add_trans(rtn, start, prod->end, l->body);
1722 rtn_rules(rtn, l->body);
1726 add_trans(rtn, start, prod->end, l->child);
1737 /* Replace transition t with two epsilon transitions s => p->start and
1738 * p->end => s->trans[i].to where s is the start of t. Instead of adding
1739 * epsilon transitions, we expand the epsilon transitions.
1741 static void prod_splice(struct rtn *rtn,
1742 struct prod *from, struct prod *to, struct trans *t) {
1744 add_trans(rtn, to->end, t->to, NULL);
1745 ERR_BAIL(from->lens->info);
1747 unref(t->re, regexp);
1753 static void rtn_splice(struct rtn *rtn, struct prod *prod) {
1754 for (struct state *s = prod->start; s != prod->end; s = s->next) {
1755 for (int i=0; i < s->ntrans; i++) {
1756 struct prod *p = prod_for_lens(rtn, s->trans[i].lens);
1758 prod_splice(rtn, prod, p, s->trans+i);
1767 static struct rtn *rtn_build(struct lens *rec, enum lens_type lt) {
1772 ERR_NOMEM(r < 0, rec->info);
1774 rtn->info = ref(rec->info);
1775 rtn->lens_type = lt;
1777 rtn_rules(rtn, rec);
1779 if (debugging("cf.approx"))
1780 rtn_dot(rtn, "10-rules");
1782 for (int i=0; i < rtn->nprod; i++) {
1783 rtn_splice(rtn, rtn->prod[i]);
1786 if (debugging("cf.approx"))
1787 rtn_dot(rtn, "11-splice");
1793 /* Compare transitions lexicographically by (to, lens) */
1794 static int trans_to_cmp(const void *v1, const void *v2) {
1795 const struct trans *t1 = v1;
1796 const struct trans *t2 = v2;
1798 if (t1->to != t2->to)
1799 return (t1->to < t2->to) ? -1 : 1;
1801 if (t1->lens == t2->lens)
1803 return (t1->lens < t2->lens) ? -1 : 1;
1806 /* Collapse a transition S1 -> S -> S2 by adding a transition S1 -> S2 with
1807 * lens R1 . (LOOP)* . R2 | R3 where R3 is the regexp on the possibly
1808 * existing transition S1 -> S2. If LOOP is NULL or R3 does not exist,
1809 * label the transition with a simplified regexp by treating NULL as
1811 static void collapse_trans(struct rtn *rtn,
1812 struct state *s1, struct state *s2,
1813 struct regexp *r1, struct regexp *loop,
1814 struct regexp *r2) {
1816 struct trans *t = NULL;
1817 struct regexp *r = NULL;
1819 for (int i=0; i < s1->ntrans; i++) {
1820 if (s1->trans[i].to == s2) {
1826 /* Set R = R1 . (LOOP)* . R2, treating NULL's as epsilon */
1830 else if (r2 == NULL)
1833 r = regexp_concat(rtn->info, r1, r2);
1835 struct regexp *s = regexp_iter(rtn->info, loop, 0, -1);
1836 ERR_NOMEM(s == NULL, rtn->info);
1837 struct regexp *c = NULL;
1842 c = regexp_concat(rtn->info, r1, s);
1844 ERR_NOMEM(c == NULL, rtn->info);
1850 r = regexp_concat(rtn->info, c, r2);
1852 ERR_NOMEM(r == NULL, rtn->info);
1857 t = add_trans(rtn, s1, s2, NULL);
1858 ERR_NOMEM(t == NULL, rtn->info);
1860 } else if (t->re == NULL) {
1861 if (r == NULL || regexp_matches_empty(r))
1864 t->re = regexp_maybe(rtn->info, r);
1866 ERR_NOMEM(t->re == NULL, rtn->info);
1868 } else if (r == NULL) {
1869 if (!regexp_matches_empty(t->re)) {
1870 r = regexp_maybe(rtn->info, t->re);
1871 unref(t->re, regexp);
1873 ERR_NOMEM(r == NULL, rtn->info);
1876 struct regexp *u = regexp_union(rtn->info, r, t->re);
1878 unref(t->re, regexp);
1880 ERR_NOMEM(u == NULL, rtn->info);
1885 rtn->exn = rtn->info->error->exn;
1889 /* Reduce the automaton with start state rprod->start and only accepting
1890 * state rprod->end so that we have a single transition rprod->start =>
1891 * rprod->end labelled with the overall approximating regexp for the
1894 * This is the same algorithm as fa_as_regexp in fa.c
1896 static struct regexp *rtn_reduce(struct rtn *rtn, struct lens *rec) {
1897 struct prod *prod = prod_for_lens(rtn, rec);
1900 ERR_THROW(prod == NULL, rtn->info, AUG_EINTERNAL,
1901 "No production for recursive lens");
1903 /* Eliminate epsilon transitions and turn transitions between the same
1904 * two states into a regexp union */
1905 list_for_each(s, rtn->states) {
1906 qsort(s->trans, s->ntrans, sizeof(*s->trans), trans_to_cmp);
1907 for (int i=0; i < s->ntrans; i++) {
1909 for (;j < s->ntrans && s->trans[i].to == s->trans[j].to;
1912 struct regexp *u, **v;
1913 r = ALLOC_N(v, j - i);
1914 ERR_NOMEM(r < 0, rtn->info);
1915 for (int k=i; k < j; k++)
1916 v[k-i] = s->trans[k].re;
1917 u = regexp_union_n(rtn->info, j - i, v);
1919 // FIXME: The calling convention for regexp_union_n
1920 // is bad, since we can't distinguish between alloc
1921 // failure and unioning all NULL's
1922 for (int k=0; k < j-i; k++)
1925 ERR_NOMEM(true, rtn->info);
1929 for (int k=i; k < j; k++) {
1930 unref(s->trans[k].lens, lens);
1931 unref(s->trans[k].re, regexp);
1934 MEMMOVE(s->trans + (i+1),
1937 s->ntrans -= j - (i + 1);
1942 /* Introduce new start and end states with epsilon transitions to/from
1943 * the old start and end states */
1944 struct state *end = NULL;
1945 struct state *start = NULL;
1946 if (ALLOC(start) < 0 || ALLOC(end) < 0) {
1949 ERR_NOMEM(true, rtn->info);
1951 list_insert_before(start, prod->start, rtn->states);
1952 end->next = prod->end->next;
1953 prod->end->next = end;
1955 add_trans(rtn, start, prod->start, NULL);
1957 add_trans(rtn, prod->end, end, NULL);
1960 prod->start = start;
1963 /* Eliminate states S (except for INI and FIN) one by one:
1964 * Let LOOP the regexp for the transition S -> S if it exists, epsilon
1966 * For all S1, S2 different from S with S1 -> S -> S2
1967 * Let R1 the regexp of S1 -> S
1968 * R2 the regexp of S -> S2
1969 * R3 the regexp of S1 -> S2 (or the regexp matching nothing
1970 * if no such transition)
1971 * set the regexp on the transition S1 -> S2 to
1972 * R1 . (LOOP)* . R2 | R3 */
1973 // FIXME: This does not go over all states
1974 list_for_each(s, rtn->states) {
1975 if (s == prod->end || s == prod->start)
1977 struct regexp *loop = NULL;
1978 for (int i=0; i < s->ntrans; i++) {
1979 if (s == s->trans[i].to) {
1980 ensure(loop == NULL, rtn->info);
1981 loop = s->trans[i].re;
1984 list_for_each(s1, rtn->states) {
1987 for (int t1=0; t1 < s1->ntrans; t1++) {
1988 if (s == s1->trans[t1].to) {
1989 for (int t2=0; t2 < s->ntrans; t2++) {
1990 struct state *s2 = s->trans[t2].to;
1993 collapse_trans(rtn, s1, s2,
1994 s1->trans[t1].re, loop,
2003 /* Find the overall regexp */
2004 struct regexp *result = NULL;
2005 for (int i=0; i < prod->start->ntrans; i++) {
2006 if (prod->start->trans[i].to == prod->end) {
2007 ensure(result == NULL, rtn->info);
2008 result = ref(prod->start->trans[i].re);
2016 static void propagate_type(struct lens *l, enum lens_type lt) {
2017 struct regexp **types = NULL;
2020 if (! l->recursive || ltype(l, lt) != NULL)
2025 r = ALLOC_N(types, l->nchildren);
2026 ERR_NOMEM(r < 0, l->info);
2027 for (int i=0; i < l->nchildren; i++) {
2028 propagate_type(l->children[i], lt);
2029 types[i] = ltype(l->children[i], lt);
2031 ltype(l, lt) = regexp_concat_n(l->info, l->nchildren, types);
2035 r = ALLOC_N(types, l->nchildren);
2036 ERR_NOMEM(r < 0, l->info);
2037 for (int i=0; i < l->nchildren; i++) {
2038 propagate_type(l->children[i], lt);
2039 types[i] = ltype(l->children[i], lt);
2041 ltype(l, lt) = regexp_union_n(l->info, l->nchildren, types);
2045 propagate_type(l->child, lt);
2047 l->atype = subtree_atype(l->info, l->child->ktype, l->child->vtype);
2049 l->ctype = ref(l->child->ctype);
2052 propagate_type(l->child, lt);
2053 ltype(l, lt) = regexp_iter(l->info, ltype(l->child, lt), 0, -1);
2056 propagate_type(l->child, lt);
2057 ltype(l, lt) = regexp_maybe(l->info, ltype(l->child, lt));
2063 propagate_type(l->child, lt);
2064 ltype(l, lt) = ref(ltype(l->child, lt));
2075 static struct value *typecheck(struct lens *l, int check);
2077 typedef struct value *typecheck_n_make(struct info *,
2078 struct lens *, struct lens *, int);
2080 static struct info *merge_info(struct info *i1, struct info *i2) {
2083 ERR_NOMEM(info == NULL, i1);
2085 info->filename = ref(i1->filename);
2086 info->first_line = i1->first_line;
2087 info->first_column = i1->first_column;
2088 info->last_line = i2->last_line;
2089 info->last_column = i2->last_column;
2090 info->error = i1->error;
2098 static struct value *typecheck_n(struct lens *l,
2099 typecheck_n_make *make, int check) {
2100 struct value *exn = NULL;
2101 struct lens *acc = NULL;
2103 ensure(l->tag == L_CONCAT || l->tag == L_UNION, l->info);
2104 for (int i=0; i < l->nchildren; i++) {
2105 exn = typecheck(l->children[i], check);
2109 acc = ref(l->children[0]);
2110 for (int i=1; i < l->nchildren; i++) {
2111 struct info *info = merge_info(acc->info, l->children[i]->info);
2112 ERR_BAIL(acc->info);
2113 exn = (*make)(info, acc, ref(l->children[i]), check);
2116 ensure(exn->tag == V_LENS, l->info);
2117 acc = ref(exn->lens);
2120 l->value = acc->value;
2127 static struct value *typecheck(struct lens *l, int check) {
2128 struct value *exn = NULL;
2130 /* Nonrecursive lenses are typechecked at build time */
2136 exn = typecheck_n(l, lns_make_concat, check);
2139 exn = typecheck_n(l, lns_make_union, check);
2143 exn = typecheck(l->child, check);
2147 exn = typecheck_iter(l->info, l->child);
2148 if (exn == NULL && l->value)
2149 exn = make_exn_value(l->info, "Multiple stores in iteration");
2150 if (exn == NULL && l->key)
2151 exn = make_exn_value(l->info, "Multiple keys/labels in iteration");
2155 exn = typecheck_maybe(l->info, l->child);
2156 l->key = l->child->key;
2157 l->value = l->child->value;
2170 static struct value *rtn_approx(struct lens *rec, enum lens_type lt) {
2171 struct rtn *rtn = NULL;
2172 struct value *result = NULL;
2174 rtn = rtn_build(rec, lt);
2176 ltype(rec, lt) = rtn_reduce(rtn, rec);
2178 if (debugging("cf.approx"))
2179 rtn_dot(rtn, "50-reduce");
2181 propagate_type(rec->body, lt);
2182 ERR_BAIL(rec->info);
2187 if (debugging("cf.approx")) {
2188 printf("approx %s => ", lens_type_names[lt]);
2189 print_regexp(stdout, ltype(rec, lt));
2195 if (rtn->exn == NULL)
2196 result = rec->info->error->exn;
2198 result = ref(rtn->exn);
2202 static struct value *
2203 exn_multiple_epsilons(struct lens *lens,
2204 struct lens *l1, struct lens *l2) {
2206 struct value *exn = NULL;
2208 exn = make_exn_value(ref(lens->info),
2209 "more than one nullable branch in a union");
2210 fi = format_info(l1->info);
2211 exn_printf_line(exn, "First nullable lens: %s", fi);
2214 fi = format_info(l2->info);
2215 exn_printf_line(exn, "Second nullable lens: %s", fi);
2221 /* Update lens->ctype_nullable and return 1 if there was a change,
2222 * 0 if there was none */
2223 static int ctype_nullable(struct lens *lens, struct value **exn) {
2226 struct lens *null_lens = NULL;
2228 if (! lens->recursive)
2234 for (int i=0; i < lens->nchildren; i++) {
2235 if (ctype_nullable(lens->children[i], exn))
2237 if (! lens->children[i]->ctype_nullable)
2242 for (int i=0; i < lens->nchildren; i++) {
2243 if (ctype_nullable(lens->children[i], exn))
2245 if (lens->children[i]->ctype_nullable) {
2247 *exn = exn_multiple_epsilons(lens, null_lens,
2252 null_lens = lens->children[i];
2258 ret = ctype_nullable(lens->child, exn);
2259 nullable = lens->child->ctype_nullable;
2266 nullable = lens->body->ctype_nullable;
2274 if (nullable != lens->ctype_nullable) {
2276 lens->ctype_nullable = nullable;
2281 struct value *lns_check_rec(struct info *info,
2282 struct lens *body, struct lens *rec,
2284 /* The types in the order of approximation */
2285 static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
2286 struct value *result = NULL;
2288 ensure(rec->tag == L_REC, info);
2289 ensure(rec->rec_internal, info);
2291 /* The user might have written down a regular lens with 'let rec' */
2292 if (! body->recursive) {
2293 result = make_lens_value(ref(body));
2294 ERR_NOMEM(result == NULL, info);
2298 /* To help memory management, we avoid the cycle inherent ina recursive
2299 * lens by using two instances of an L_REC lens. One is marked with
2300 * rec_internal, and used inside the body of the lens. The other is the
2301 * "toplevel" which receives external references.
2303 * The internal instance of the recursive lens is REC, the external one
2304 * is TOP, constructed below
2306 rec->body = body; /* REC does not own BODY */
2308 for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
2309 result = rtn_approx(rec, types[i]);
2313 if (rec->atype == NULL) {
2314 result = make_exn_value(ref(rec->info),
2315 "recursive lens generates the empty language for its %s",
2316 rec->ctype == NULL ? "ctype" : "atype");
2320 rec->key = rec->body->key;
2321 rec->value = rec->body->value;
2322 rec->consumes_value = rec->body->consumes_value;
2324 while(ctype_nullable(rec->body, &result));
2327 rec->ctype_nullable = rec->body->ctype_nullable;
2329 result = typecheck(rec->body, check);
2333 result = lns_make_rec(ref(rec->info));
2334 struct lens *top = result->lens;
2335 for (int t=0; t < ntypes; t++)
2336 ltype(top, t) = ref(ltype(rec, t));
2337 top->value = rec->value;
2338 top->key = rec->key;
2339 top->consumes_value = rec->consumes_value;
2340 top->ctype_nullable = rec->ctype_nullable;
2341 top->body = ref(body);
2343 top->rec_internal = 0;
2346 top->jmt = jmt_build(top);
2351 if (result != NULL && result->tag != V_EXN)
2352 unref(result, value);
2354 result = info->error->exn;
2359 void dump_lens_tree(struct lens *lens){
2360 static int count = 0;
2363 fp = debug_fopen("lens_%02d_%s.dot", count++, ltag(lens));
2367 fprintf(fp, "digraph \"%s\" {\n", "lens");
2368 dump_lens(fp, lens);
2374 void dump_lens(FILE *out, struct lens *lens){
2378 fprintf(out, "\"%p\" [ shape = box, label = \"%s\\n", lens, ltag(lens));
2380 for (int t=0; t < ntypes; t++) {
2381 re = ltype(lens, t);
2384 fprintf(out, "%s=",lens_type_names[t]);
2385 print_regexp(out, re);
2386 fprintf(out, "\\n");
2389 fprintf(out, "recursive=%x\\n", lens->recursive);
2390 fprintf(out, "rec_internal=%x\\n", lens->rec_internal);
2391 fprintf(out, "consumes_value=%x\\n", lens->consumes_value);
2392 fprintf(out, "ctype_nullable=%x\\n", lens->ctype_nullable);
2393 fprintf(out, "\"];\n");
2410 for(i = 0; i<lens->nchildren;i++){
2411 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2412 dump_lens(out, lens->children[i]);
2416 for(i = 0; i<lens->nchildren;i++){
2417 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2418 dump_lens(out, lens->children[i]);
2422 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2423 dump_lens(out, lens->child);
2426 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2427 dump_lens(out, lens->child);
2431 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2432 dump_lens(out, lens->child);
2436 if (lens->rec_internal == 0){
2437 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2438 dump_lens(out, lens->body);
2442 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2443 dump_lens(out, lens->child);
2446 fprintf(out, "ERROR\n");
2454 * indent-tabs-mode: nil