4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
31 /* This enum must be kept in sync with type_offs and ntypes */
33 CTYPE, ATYPE, KTYPE, VTYPE
36 static const int const type_offs[] = {
37 offsetof(struct lens, ctype),
38 offsetof(struct lens, atype),
39 offsetof(struct lens, ktype),
40 offsetof(struct lens, vtype)
42 static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
44 static const char *lens_type_names[] =
45 { "ctype", "atype", "ktype", "vtype" };
47 #define ltype(lns, t) *((struct regexp **) ((char *) lns + type_offs[t]))
49 static struct value * typecheck_union(struct info *,
50 struct lens *l1, struct lens *l2);
51 static struct value *typecheck_concat(struct info *,
52 struct lens *l1, struct lens *l2);
53 static struct value *typecheck_square(struct info *,
54 struct lens *l1, struct lens *l2);
55 static struct value *typecheck_iter(struct info *info, struct lens *l);
56 static struct value *typecheck_maybe(struct info *info, struct lens *l);
58 /* Lens names for pretty printing */
59 /* keep order in sync with enum type */
60 static const char *const tags[] = {
61 "del", "store", "value", "key", "label", "seq", "counter",
63 "subtree", "star", "maybe", "rec", "square"
66 #define ltag(lens) (tags[lens->tag - L_DEL])
68 static const struct string digits_string = {
69 .ref = REF_MAX, .str = (char *) "[0123456789]+"
71 static const struct string *const digits_pat = &digits_string;
73 char *format_lens(struct lens *l) {
75 return strdup("(no lens)");
78 char *inf = format_info(l->info);
81 xasprintf(&result, "%s[%s]%s", tags[l->tag - L_DEL], inf,
82 l->recursive ? "R" : "r");
87 #define BUG_LENS_TAG(lns) bug_lens_tag(lns, __FILE__, __LINE__)
89 static void bug_lens_tag(struct lens *lens, const char *file, int lineno) {
90 if (lens != NULL && lens->info != NULL && lens->info->error != NULL) {
91 char *s = format_lens(lens);
92 bug_on(lens->info->error, file, lineno, "Unexpected lens tag %s", s);
95 /* We are really screwed */
101 /* Construct a finite automaton from REGEXP and return it in *FA.
103 * Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors,
104 * return an exception.
106 static struct value *str_to_fa(struct info *info, const char *pattern,
107 struct fa **fa, int nocase) {
109 struct value *exn = NULL;
111 char *re_str = NULL, *re_err = NULL;
114 error = fa_compile(pattern, strlen(pattern), fa);
115 if (error == REG_NOERROR) {
117 error = fa_nocase(*fa);
118 ERR_NOMEM(error < 0, info);
123 re_str = escape(pattern, -1, RX_ESCAPES);
124 ERR_NOMEM(re_str == NULL, info);
126 exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
128 re_err_len = regerror(error, NULL, NULL, 0);
129 error = ALLOC_N(re_err, re_err_len);
130 ERR_NOMEM(error < 0, info);
132 regerror(error, NULL, re_err, re_err_len);
133 exn_printf_line(exn, "%s", re_err);
142 exn = info->error->exn;
146 static struct value *regexp_to_fa(struct regexp *regexp, struct fa **fa) {
147 return str_to_fa(regexp->info, regexp->pattern->str, fa, regexp->nocase);
150 static struct lens *make_lens(enum lens_tag tag, struct info *info) {
159 static struct lens *make_lens_unop(enum lens_tag tag, struct info *info,
160 struct lens *child) {
161 struct lens *lens = make_lens(tag, info);
163 lens->value = child->value;
164 lens->key = child->key;
168 typedef struct regexp *regexp_combinator(struct info *, int, struct regexp **);
170 static struct lens *make_lens_binop(enum lens_tag tag, struct info *info,
171 struct lens *l1, struct lens *l2,
172 regexp_combinator *combinator) {
173 struct lens *lens = make_lens(tag, info);
174 int n1 = (l1->tag == tag) ? l1->nchildren : 1;
175 struct regexp **types = NULL;
180 lens->nchildren = n1;
181 lens->nchildren += (l2->tag == tag) ? l2->nchildren : 1;
183 lens->recursive = l1->recursive || l2->recursive;
184 lens->rec_internal = l1->rec_internal || l2->rec_internal;
186 if (ALLOC_N(lens->children, lens->nchildren) < 0) {
191 if (l1->tag == tag) {
192 for (int i=0; i < l1->nchildren; i++)
193 lens->children[i] = ref(l1->children[i]);
196 lens->children[0] = l1;
199 if (l2->tag == tag) {
200 for (int i=0; i < l2->nchildren; i++)
201 lens->children[n1 + i] = ref(l2->children[i]);
204 lens->children[n1] = l2;
207 for (int i=0; i < lens->nchildren; i++) {
208 lens->value = lens->value || lens->children[i]->value;
209 lens->key = lens->key || lens->children[i]->key;
212 if (ALLOC_N(types, lens->nchildren) < 0)
215 if (! lens->rec_internal) {
216 /* Inside a recursive lens, we assign types with lns_check_rec
217 * once we know the entire lens */
218 for (int t=0; t < ntypes; t++) {
219 if (lens->recursive && t == CTYPE)
221 for (int i=0; i < lens->nchildren; i++)
222 types[i] = ltype(lens->children[i], t);
223 ltype(lens, t) = (*combinator)(info, lens->nchildren, types);
228 for (int i=0; i < lens->nchildren; i++)
229 ensure(tag != lens->children[i]->tag, lens->info);
238 static struct value *make_lens_value(struct lens *lens) {
240 v = make_value(V_LENS, ref(lens->info));
245 struct value *lns_make_union(struct info *info,
246 struct lens *l1, struct lens *l2, int check) {
247 struct lens *lens = NULL;
248 int consumes_value = l1->consumes_value && l2->consumes_value;
249 int recursive = l1->recursive || l2->recursive;
250 int ctype_nullable = l1->ctype_nullable || l2->ctype_nullable;
253 struct value *exn = typecheck_union(info, l1, l2);
258 lens = make_lens_binop(L_UNION, info, l1, l2, regexp_union_n);
259 lens->consumes_value = consumes_value;
261 lens->ctype_nullable = ctype_nullable;
262 return make_lens_value(lens);
265 struct value *lns_make_concat(struct info *info,
266 struct lens *l1, struct lens *l2, int check) {
267 struct lens *lens = NULL;
268 int consumes_value = l1->consumes_value || l2->consumes_value;
269 int recursive = l1->recursive || l2->recursive;
270 int ctype_nullable = l1->ctype_nullable && l2->ctype_nullable;
273 struct value *exn = typecheck_concat(info, l1, l2);
277 if (l1->value && l2->value) {
278 return make_exn_value(info, "Multiple stores in concat");
280 if (l1->key && l2->key) {
281 return make_exn_value(info, "Multiple keys/labels in concat");
284 lens = make_lens_binop(L_CONCAT, info, l1, l2, regexp_concat_n);
285 lens->consumes_value = consumes_value;
287 lens->ctype_nullable = ctype_nullable;
288 return make_lens_value(lens);
291 static struct regexp *subtree_atype(struct info *info,
292 struct regexp *ktype,
293 struct regexp *vtype) {
294 const char *kpat = (ktype == NULL) ? ENC_NULL : ktype->pattern->str;
295 const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
297 struct regexp *result = NULL;
298 char *ks = NULL, *vs = NULL;
301 if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
302 ks = regexp_expand_nocase(ktype);
303 vs = regexp_expand_nocase(vtype);
304 ERR_NOMEM(ks == NULL || vs == NULL, info);
305 if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
306 ERR_NOMEM(true, info);
309 if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
310 ERR_NOMEM(pat == NULL, info);
314 nocase = ktype->nocase;
315 else if (vtype != NULL)
316 nocase = vtype->nocase;
318 result = make_regexp(info, pat, nocase);
326 * A subtree lens l1 = [ l ]
328 * Types are assigned as follows:
330 * l1->ctype = l->ctype
331 * l1->atype = encode(l->ktype, l->vtype)
335 struct value *lns_make_subtree(struct info *info, struct lens *l) {
338 lens = make_lens_unop(L_SUBTREE, info, l);
339 lens->ctype = ref(l->ctype);
341 lens->atype = subtree_atype(info, l->ktype, l->vtype);
342 lens->value = lens->key = 0;
343 lens->recursive = l->recursive;
344 lens->rec_internal = l->rec_internal;
346 lens->ctype_nullable = l->ctype_nullable;
347 return make_lens_value(lens);
350 struct value *lns_make_star(struct info *info, struct lens *l, int check) {
354 struct value *exn = typecheck_iter(info, l);
359 return make_exn_value(info, "Multiple stores in iteration");
362 return make_exn_value(info, "Multiple keys/labels in iteration");
365 lens = make_lens_unop(L_STAR, info, l);
366 for (int t = 0; t < ntypes; t++) {
367 ltype(lens, t) = regexp_iter(info, ltype(l, t), 0, -1);
369 lens->recursive = l->recursive;
370 lens->rec_internal = l->rec_internal;
371 lens->ctype_nullable = 1;
372 return make_lens_value(lens);
375 struct value *lns_make_plus(struct info *info, struct lens *l, int check) {
376 struct value *star, *conc;
378 star = lns_make_star(info, l, check);
382 conc = lns_make_concat(ref(info), ref(l), ref(star->lens), check);
387 struct value *lns_make_maybe(struct info *info, struct lens *l, int check) {
391 struct value *exn = typecheck_maybe(info, l);
395 lens = make_lens_unop(L_MAYBE, info, l);
396 for (int t=0; t < ntypes; t++)
397 ltype(lens, t) = regexp_maybe(info, ltype(l, t));
398 lens->value = l->value;
400 lens->recursive = l->recursive;
401 lens->rec_internal = l->rec_internal;
402 lens->ctype_nullable = 1;
403 return make_lens_value(lens);
406 /* The ctype of SQR is a regular approximation of the true ctype of SQR
407 * at this point. In some situations, for example in processing quoted
408 * strings this leads to false typecheck errors; to lower the chances
409 * of these, we try to construct the precise ctype of SQR if the
410 * language of L1 is finite (and has a small number of words)
412 static void square_precise_type(struct info *info,
415 struct regexp *body) {
419 struct fa *fa = NULL;
420 struct value *exn = NULL;
421 struct regexp **u = NULL, *c[3], *w = NULL;
423 exn = str_to_fa(info, left->pattern->str, &fa, left->nocase);
427 nwords = fa_enumerate(fa, 10, &words); /* The limit of 10 is arbitrary */
431 r = ALLOC_N(u, nwords);
432 ERR_NOMEM(r < 0, info);
435 for (int i=0; i < nwords; i++) {
436 w = make_regexp_literal(left->info, words[i]);
437 ERR_NOMEM(w == NULL, info);
438 w->nocase = left->nocase;
441 u[i] = regexp_concat_n(info, 3, c);
444 ERR_NOMEM(u[i] == NULL, info);
446 w = regexp_union_n(info, nwords, u);
455 for (int i=0; i < nwords; i++) {
466 /* Build a square lens as
467 * left . body . right
468 * where left and right accepts the same language and
469 * captured strings must match. The inability to express this with other
470 * lenses makes the square primitive necessary.
472 struct value * lns_make_square(struct info *info, struct lens *l1,
473 struct lens *l2, struct lens *l3, int check) {
474 struct value *cnt1 = NULL, *cnt2 = NULL, *res = NULL;
475 struct lens *sqr = NULL;
477 /* supported types: L_KEY . body . L_DEL or L_DEL . body . L_DEL */
478 if (l3->tag != L_DEL || (l1->tag != L_DEL && l1->tag != L_KEY))
479 return make_exn_value(info, "Supported types: (key lns del) or (del lns del)");
481 res = typecheck_square(info, l1, l3);
485 res = lns_make_concat(ref(info), ref(l1), ref(l2), check);
489 res = lns_make_concat(ref(info), ref(cnt1->lens), ref(l3), check);
494 sqr = make_lens_unop(L_SQUARE, ref(info), ref(cnt2->lens));
495 ERR_NOMEM(sqr == NULL, info);
497 for (int t=0; t < ntypes; t++)
498 ltype(sqr, t) = ref(ltype(cnt2->lens, t));
500 square_precise_type(info, &(sqr->ctype), l1->ctype, l2->ctype);
502 sqr->recursive = cnt2->lens->recursive;
503 sqr->rec_internal = cnt2->lens->rec_internal;
504 sqr->consumes_value = cnt2->lens->consumes_value;
506 res = make_lens_value(sqr);
507 ERR_NOMEM(res == NULL, info);
525 static struct regexp *make_regexp_from_string(struct info *info,
526 struct string *string) {
531 r->pattern = ref(string);
537 static struct regexp *restrict_regexp(struct regexp *r) {
539 struct regexp *result = NULL;
543 ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
545 RESERVED_FROM, RESERVED_TO);
546 ERR_NOMEM(ret == REG_ESPACE || ret < 0, r->info);
547 BUG_ON(ret != 0, r->info, NULL);
548 ensure(nre_len == strlen(nre), r->info);
550 ret = regexp_c_locale(&nre, &nre_len);
551 ERR_NOMEM(ret < 0, r->info);
553 result = make_regexp(r->info, nre, r->nocase);
555 BUG_ON(regexp_compile(result) != 0, r->info,
556 "Could not compile restricted regexp");
561 unref(result, regexp);
565 struct value *lns_make_prim(enum lens_tag tag, struct info *info,
566 struct regexp *regexp, struct string *string) {
567 struct lens *lens = NULL;
568 struct value *exn = NULL;
569 struct fa *fa_slash = NULL;
570 struct fa *fa_key = NULL;
571 struct fa *fa_isect = NULL;
575 exn = str_to_fa(info, "(.|\n)*/(.|\n)*", &fa_slash, regexp->nocase);
579 exn = regexp_to_fa(regexp, &fa_key);
583 fa_isect = fa_intersect(fa_slash, fa_key);
584 if (! fa_is_basic(fa_isect, FA_EMPTY)) {
585 exn = make_exn_value(info,
586 "The key regexp /%s/ matches a '/'",
587 regexp->pattern->str);
593 fa_isect = fa_key = fa_slash = NULL;
594 } else if (tag == L_LABEL) {
595 if (strchr(string->str, SEP) != NULL) {
596 exn = make_exn_value(info,
597 "The label string \"%s\" contains a '/'",
601 } else if (tag == L_DEL && string != NULL) {
603 const char *dflt = string->str;
604 cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
605 if (cnt != strlen(dflt)) {
606 char *s = escape(dflt, -1, RX_ESCAPES);
607 char *r = regexp_escape(regexp);
608 exn = make_exn_value(info,
609 "del: the default value '%s' does not match /%s/",
617 /* Build the actual lens */
618 lens = make_lens(tag, info);
619 lens->regexp = regexp;
620 lens->string = string;
621 lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ);
622 lens->value = (tag == L_STORE || tag == L_VALUE);
623 lens->consumes_value = (tag == L_STORE || tag == L_VALUE);
624 lens->atype = regexp_make_empty(info);
626 if (tag == L_DEL || tag == L_STORE || tag == L_KEY) {
627 lens->ctype = ref(regexp);
628 lens->ctype_nullable = regexp_matches_empty(lens->ctype);
629 } else if (tag == L_LABEL || tag == L_VALUE
630 || tag == L_SEQ || tag == L_COUNTER) {
631 lens->ctype = regexp_make_empty(info);
632 lens->ctype_nullable = 1;
642 make_regexp_from_string(info, (struct string *) digits_pat);
643 if (lens->ktype == NULL)
645 } else if (tag == L_KEY) {
646 lens->ktype = restrict_regexp(lens->regexp);
647 } else if (tag == L_LABEL) {
648 lens->ktype = make_regexp_literal(info, lens->string->str);
649 if (lens->ktype == NULL)
654 if (tag == L_STORE) {
655 lens->vtype = restrict_regexp(lens->regexp);
656 } else if (tag == L_VALUE) {
657 lens->vtype = make_regexp_literal(info, lens->string->str);
660 return make_lens_value(lens);
669 * Typechecking of lenses
671 static struct value *disjoint_check(struct info *info, bool is_get,
672 struct regexp *r1, struct regexp *r2) {
673 struct fa *fa1 = NULL;
674 struct fa *fa2 = NULL;
675 struct fa *fa = NULL;
676 struct value *exn = NULL;
677 const char *const msg = is_get ? "union.get" : "tree union.put";
679 if (r1 == NULL || r2 == NULL)
682 exn = regexp_to_fa(r1, &fa1);
686 exn = regexp_to_fa(r2, &fa2);
690 fa = fa_intersect(fa1, fa2);
691 if (! fa_is_basic(fa, FA_EMPTY)) {
694 fa_example(fa, &xmpl, &xmpl_len);
696 char *fmt = enc_format(xmpl, xmpl_len);
702 exn = make_exn_value(ref(info),
703 "overlapping lenses in %s", msg);
706 exn_printf_line(exn, "Example matched by both: '%s'", xmpl);
708 exn_printf_line(exn, "Example matched by both: %s", xmpl);
720 static struct value *typecheck_union(struct info *info,
721 struct lens *l1, struct lens *l2) {
722 struct value *exn = NULL;
724 exn = disjoint_check(info, true, l1->ctype, l2->ctype);
726 exn = disjoint_check(info, false, l1->atype, l2->atype);
729 char *fi = format_info(l1->info);
730 exn_printf_line(exn, "First lens: %s", fi);
733 fi = format_info(l2->info);
734 exn_printf_line(exn, "Second lens: %s", fi);
740 static struct value *
741 ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
742 enum lens_type typ, struct lens *l1, struct lens *l2,
743 const char *msg, bool iterated) {
746 struct value *exn = NULL;
749 r = fa_ambig_example(fa1, fa2, &upv, &upv_len, &pv, &v);
751 exn = make_exn_value(ref(info), "not enough memory");
755 ERR_REPORT(info, AUG_ENOMEM, NULL);
756 return info->error->exn;
761 char *e_u, *e_up, *e_upv, *e_pv, *e_v;
765 e_u = enc_format(upv, pv - upv);
766 e_up = enc_format(upv, v - upv);
767 e_upv = enc_format(upv, upv_len);
768 e_pv = enc_format(pv, strlen(pv));
769 e_v = enc_format(v, strlen(v));
770 lns_format_atype(l1, &s1);
771 lns_format_atype(l2, &s2);
773 e_u = escape(upv, pv - upv, RX_ESCAPES);
774 e_up = escape(upv, v - upv, RX_ESCAPES);
775 e_upv = escape(upv, -1, RX_ESCAPES);
776 e_pv = escape(pv, -1, RX_ESCAPES);
777 e_v = escape(v, -1, RX_ESCAPES);
778 s1 = regexp_escape(ltype(l1, typ));
779 s2 = regexp_escape(ltype(l2, typ));
781 exn = make_exn_value(ref(info), "%s", msg);
783 exn_printf_line(exn, " Iterated regexp: /%s/", s1);
785 exn_printf_line(exn, " First regexp: /%s/", s1);
786 exn_printf_line(exn, " Second regexp: /%s/", s2);
788 exn_printf_line(exn, " '%s' can be split into", e_upv);
789 exn_printf_line(exn, " '%s|=|%s'\n", e_u, e_pv);
790 exn_printf_line(exn, " and");
791 exn_printf_line(exn, " '%s|=|%s'\n", e_up, e_v);
804 static struct value *
805 ambig_concat_check(struct info *info, const char *msg,
806 enum lens_type typ, struct lens *l1, struct lens *l2) {
807 struct fa *fa1 = NULL;
808 struct fa *fa2 = NULL;
809 struct value *result = NULL;
810 struct regexp *r1 = ltype(l1, typ);
811 struct regexp *r2 = ltype(l2, typ);
813 if (r1 == NULL || r2 == NULL)
816 result = regexp_to_fa(r1, &fa1);
820 result = regexp_to_fa(r2, &fa2);
824 result = ambig_check(info, fa1, fa2, typ, l1, l2, msg, false);
831 static struct value *typecheck_concat(struct info *info,
832 struct lens *l1, struct lens *l2) {
833 struct value *result = NULL;
835 result = ambig_concat_check(info, "ambiguous concatenation",
837 if (result == NULL) {
838 result = ambig_concat_check(info, "ambiguous tree concatenation",
841 if (result != NULL) {
842 char *fi = format_info(l1->info);
843 exn_printf_line(result, "First lens: %s", fi);
845 fi = format_info(l2->info);
846 exn_printf_line(result, "Second lens: %s", fi);
852 static struct value *make_exn_square(struct info *info, struct lens *l1,
853 struct lens *l2, const char *msg) {
856 struct value *exn = make_exn_value(ref(info), "%s",
857 "Inconsistency in lens square");
858 exn_printf_line(exn, "%s", msg);
859 fi = format_info(l1->info);
860 exn_printf_line(exn, "Left lens: %s", fi);
862 fi = format_info(l2->info);
863 exn_printf_line(exn, "Right lens: %s", fi);
868 static struct value *typecheck_square(struct info *info, struct lens *l1,
871 struct value *exn = NULL;
872 struct fa *fa1 = NULL, *fa2 = NULL;
873 struct regexp *r1 = ltype(l1, CTYPE);
874 struct regexp *r2 = ltype(l2, CTYPE);
876 if (r1 == NULL || r2 == NULL)
879 exn = regexp_to_fa(r1, &fa1);
883 exn = regexp_to_fa(r2, &fa2);
887 r = fa_equals(fa1, fa2);
890 exn = make_exn_value(ref(info), "not enough memory");
894 ERR_REPORT(info, AUG_ENOMEM, NULL);
895 return info->error->exn;;
900 exn = make_exn_square(info, l1, l2,
901 "Left and right lenses must accept the same language");
905 /* check del create consistency */
906 if (l1->tag == L_DEL && l2->tag == L_DEL) {
907 if (!STREQ(l1->string->str, l2->string->str)) {
908 exn = make_exn_square(info, l1, l2,
909 "Left and right lenses must have the same default value");
920 static struct value *
921 ambig_iter_check(struct info *info, const char *msg,
922 enum lens_type typ, struct lens *l) {
923 struct fa *fas = NULL, *fa = NULL;
924 struct value *result = NULL;
925 struct regexp *r = ltype(l, typ);
930 result = regexp_to_fa(r, &fa);
934 fas = fa_iter(fa, 0, -1);
936 result = ambig_check(info, fa, fas, typ, l, l, msg, true);
944 static struct value *typecheck_iter(struct info *info, struct lens *l) {
945 struct value *result = NULL;
947 result = ambig_iter_check(info, "ambiguous iteration", CTYPE, l);
948 if (result == NULL) {
949 result = ambig_iter_check(info, "ambiguous tree iteration", ATYPE, l);
951 if (result != NULL) {
952 char *fi = format_info(l->info);
953 exn_printf_line(result, "Iterated lens: %s", fi);
959 static struct value *typecheck_maybe(struct info *info, struct lens *l) {
960 /* Check (r)? as (<e>|r) where <e> is the empty language */
961 struct value *exn = NULL;
963 if (l->ctype != NULL && regexp_matches_empty(l->ctype)) {
964 exn = make_exn_value(ref(info),
965 "illegal optional expression: /%s/ matches the empty word",
966 l->ctype->pattern->str);
969 /* Typecheck the put direction; the check passes if
970 (1) the atype does not match the empty string, because we can tell
971 from looking at tree nodes whether L should be applied or not
972 (2) L handles a value; with that, we know whether to apply L or not
973 depending on whether the current node has a non NULL value or not
975 if (exn == NULL && ! l->consumes_value) {
976 if (l->atype != NULL && regexp_matches_empty(l->atype)) {
977 exn = make_exn_value(ref(info),
978 "optional expression matches the empty tree but does not consume a value");
984 void free_lens(struct lens *lens) {
987 ensure(lens->ref == 0, lens->info);
989 if (debugging("lenses"))
990 dump_lens_tree(lens);
993 unref(lens->regexp, regexp);
994 unref(lens->string, string);
998 unref(lens->regexp, regexp);
1004 unref(lens->string, string);
1010 unref(lens->child, lens);
1014 for (int i=0; i < lens->nchildren; i++)
1015 unref(lens->children[i], lens);
1016 free(lens->children);
1019 if (!lens->rec_internal) {
1020 unref(lens->body, lens);
1028 for (int t=0; t < ntypes; t++)
1029 unref(ltype(lens, t), regexp);
1031 unref(lens->info, info);
1032 jmt_free(lens->jmt);
1038 void lens_release(struct lens *lens) {
1042 for (int t=0; t < ntypes; t++)
1043 regexp_release(ltype(lens, t));
1045 if (lens->tag == L_KEY || lens->tag == L_STORE)
1046 regexp_release(lens->regexp);
1048 if (lens->tag == L_SUBTREE || lens->tag == L_STAR
1049 || lens->tag == L_MAYBE || lens->tag == L_SQUARE) {
1050 lens_release(lens->child);
1053 if (lens->tag == L_UNION || lens->tag == L_CONCAT) {
1054 for (int i=0; i < lens->nchildren; i++) {
1055 lens_release(lens->children[i]);
1059 if (lens->tag == L_REC && !lens->rec_internal) {
1060 lens_release(lens->body);
1063 jmt_free(lens->jmt);
1068 * Encoding of tree levels
1070 char *enc_format(const char *e, size_t len) {
1071 return enc_format_indent(e, len, 0);
1074 char *enc_format_indent(const char *e, size_t len, int indent) {
1076 char *result = NULL, *r;
1079 while (*k && k - e < len) {
1080 char *eq, *slash, *v;
1081 eq = strchr(k, ENC_EQ_CH);
1083 slash = strchr(eq, ENC_SLASH_CH);
1084 assert(slash != NULL);
1089 size += 6; /* Surrounding braces */
1091 size += 1 + (eq - k) + 1;
1093 size += 4 + (slash - v) + 1;
1096 if (ALLOC_N(result, size + 1) < 0)
1101 while (*k && k - e < len) {
1102 char *eq, *slash, *v;
1103 eq = strchr(k, ENC_EQ_CH);
1104 slash = strchr(eq, ENC_SLASH_CH);
1105 assert(eq != NULL && slash != NULL);
1108 for (int i=0; i < indent; i++)
1110 r = stpcpy(r, " { ");
1112 r = stpcpy(r, "\"");
1113 r = stpncpy(r, k, eq - k);
1114 r = stpcpy(r, "\"");
1117 r = stpcpy (r, " = \"");
1118 r = stpncpy(r, v, slash - v);
1119 r = stpcpy(r, "\"");
1121 r = stpcpy(r, " }");
1129 static int format_atype(struct lens *l, char **buf, uint indent);
1131 static int format_indent(char **buf, uint indent) {
1132 if (ALLOC_N(*buf, indent+1) < 0)
1134 memset(*buf, ' ', indent);
1138 static int format_subtree_atype(struct lens *l, char **buf, uint indent) {
1139 char *k = NULL, *v = NULL;
1140 const struct regexp *ktype = l->child->ktype;
1141 const struct regexp *vtype = l->child->vtype;
1145 if (format_indent(&si, indent) < 0)
1148 if (ktype != NULL) {
1149 k = regexp_escape(ktype);
1153 if (vtype != NULL) {
1154 v = regexp_escape(vtype);
1158 r = xasprintf(buf, "%s{ = /%s/ }", si, k, v);
1160 r = xasprintf(buf, "%s{ /%s/ = /%s/ }", si, k, v);
1163 r = xasprintf(buf, "%s{ }", si, k);
1165 r = xasprintf(buf, "%s{ /%s/ }", si, k);
1178 static int format_rep_atype(struct lens *l, char **buf,
1179 uint indent, char quant) {
1183 r = format_atype(l->child, &a, indent);
1186 if (strlen(a) == 0) {
1193 if (l->child->tag == L_CONCAT || l->child->tag == L_UNION)
1194 r = xasprintf(buf, "(%s)%c", a, quant);
1196 r = xasprintf(buf, "%s%c", a, quant);
1207 static int format_concat_atype(struct lens *l, char **buf, uint indent) {
1208 char **c = NULL, *s = NULL, *p;
1210 size_t len = 0, nconc = 0;
1212 if (ALLOC_N(c, l->nchildren) < 0)
1215 for (int i=0; i < l->nchildren; i++) {
1216 r = format_atype(l->children[i], c+i, indent);
1219 len += strlen(c[i]) + 3;
1220 if (strlen(c[i]) > 0)
1222 if (l->children[i]->tag == L_UNION)
1226 if (ALLOC_N(s, len+1) < 0)
1229 for (int i=0; i < l->nchildren; i++) {
1230 bool needs_parens = nconc > 1 && l->children[i]->tag == L_UNION;
1231 if (strlen(c[i]) == 0)
1237 for (int j=0; j < indent; j++)
1251 for (int i=0; i < l->nchildren; i++)
1258 static int format_union_atype(struct lens *l, char **buf, uint indent) {
1259 char **c = NULL, *s = NULL, *p;
1263 if (ALLOC_N(c, l->nchildren) < 0)
1266 /* Estimate the length of the string we will build. The calculation
1267 overestimates that length so that the logic is a little simpler than
1268 in the loop where we actually build the string */
1269 for (int i=0; i < l->nchildren; i++) {
1270 r = format_atype(l->children[i], c+i, indent + 2);
1273 /* We will add c[i] and some fixed characters */
1274 len += strlen(c[i]) + strlen("\n| ()");
1275 if (strlen(c[i]) < indent+2) {
1276 /* We will add indent+2 whitespace */
1281 if (ALLOC_N(s, len+1) < 0)
1285 for (int i=0; i < l->nchildren; i++) {
1289 if (strlen(t) >= indent+2) {
1290 /* c[i] is not just whitespace */
1291 p = stpncpy(p, t, indent+2);
1294 /* c[i] is just whitespace, make sure we indent the
1295 '|' appropriately */
1296 memset(p, ' ', indent+2);
1299 p = stpcpy(p, "| ");
1301 /* Skip additional indent */
1305 p = stpcpy(p, "()");
1314 for (int i=0; i < l->nchildren; i++)
1321 static int format_rec_atype(struct lens *l, char **buf, uint indent) {
1324 if (l->rec_internal) {
1325 *buf = strdup("<<rec>>");
1326 return (*buf == NULL) ? -1 : 0;
1330 r = format_atype(l->body, &c, indent);
1333 r = xasprintf(buf, "<<rec:%s>>", c);
1335 return (r < 0) ? -1 : 0;
1338 static int format_atype(struct lens *l, char **buf, uint indent) {
1350 return (*buf == NULL) ? -1 : 0;
1353 return format_subtree_atype(l, buf, indent);
1356 return format_rep_atype(l, buf, indent, '*');
1359 return format_rep_atype(l, buf, indent, '?');
1362 return format_concat_atype(l, buf, indent);
1365 return format_union_atype(l, buf, indent);
1368 return format_rec_atype(l, buf, indent);
1371 return format_concat_atype(l->child, buf, indent);
1380 int lns_format_atype(struct lens *l, char **buf) {
1382 r = format_atype(l, buf, 4);
1389 struct value *lns_make_rec(struct info *info) {
1390 struct lens *l = make_lens(L_REC, info);
1392 l->rec_internal = 1;
1394 return make_lens_value(l);
1397 /* Transform a recursive lens into a recursive transition network
1399 * First, we transform the lens into context free grammar, considering any
1400 * nonrecursive lens as a terminal
1402 * cfg: lens -> nonterminal -> production list
1404 * cfg(primitive, N) -> N := regexp(primitive)
1405 * cfg(l1 . l2, N) -> N := N1 . N2 + cfg(l1, N1) + cfg(l2, N2)
1406 * cfg(l1 | l2, N) -> N := N1 | N2 + cfg(l1, N1) + cfg(l2, N2)
1407 * cfg(l*, N) -> N := N . N' | eps + cfg(l, N')
1408 * cfg([ l ], N) -> N := N' + cfg(l, N')
1410 * We use the lenses as nonterminals themselves; this also means that our
1411 * productions are normalized such that the RHS is either a terminal
1412 * (regexp) or entirely consists of nonterminals
1414 * In a few places, we need to know that a nonterminal corresponds to a
1415 * subtree combinator ([ l ]); this is the main reason that the rule (cfg[
1416 * l ], N) introduces a useless production N := N'.
1418 * Computing the types for a recursive lens r is (fairly) straightforward,
1419 * given the above grammar, which we convert to an automaton following
1420 * http://arxiv.org/abs/cs/9910022; the only complication arises from the
1421 * subtree combinator, since it can be used in recursive lenses to
1422 * construct trees of arbitrary depth, but we need to approximate the types
1423 * of r in a way that fits with our top-down tree automaton in put.c.
1425 * To handle subtree combinators, remember that the type rules for a lens
1431 * m.atype = enc(l.ktype, l.vtype)
1432 * ( enc is a function regexp -> regexp -> regexp)
1434 * We compute types for r by modifying its automaton according to
1435 * Nederhof's paper and reducing it to a regular expression of lenses. This
1436 * has to happen in the following steps:
1437 * r.ktype : approximate by using [ .. ].ktype = NULL
1438 * r.vtype : same as r.ktype
1439 * r.ctype : approximate by treating [ l ] as l
1440 * r.atype : approximate by using r.ktype and r.vtype from above
1441 * in lens expressions [ f(r) ]
1444 /* Transitions go to a state and are labeled with a lens. For epsilon
1445 * transitions, lens may be NULL. When lens is a simple (nonrecursive
1446 * lens), PROD will be NULL. When we modify the automaton to splice
1447 * nonterminals in, we remember the production for the nonterminal in PROD.
1456 struct state *next; /* Linked list for memory management */
1458 struct trans *trans;
1461 /* Productions for lens LENS. Start state START and end state END. If we
1462 start with START, END is the only accepting state. */
1465 struct state *start;
1469 /* A recursive transition network used to compute regular approximations
1475 struct state *states; /* Linked list through next of all states in all
1476 prods; the states for each production are on
1477 the part of the list from prod->start to
1480 enum lens_type lens_type;
1481 unsigned int check : 1;
1484 #define RTN_BAIL(rtn) if ((rtn)->exn != NULL || \
1485 (rtn)->info->error->code != AUG_NOERROR) \
1488 static void free_prod(struct prod *prod) {
1491 unref(prod->lens, lens);
1495 static void free_rtn(struct rtn *rtn) {
1498 for (int i=0; i < rtn->nprod; i++)
1499 free_prod(rtn->prod[i]);
1501 list_for_each(s, rtn->states) {
1502 for (int i=0; i < s->ntrans; i++) {
1503 unref(s->trans[i].lens, lens);
1504 unref(s->trans[i].re, regexp);
1508 list_free(rtn->states);
1509 unref(rtn->info, info);
1510 unref(rtn->exn, value);
1514 static struct state *add_state(struct prod *prod) {
1515 struct state *result = NULL;
1519 ERR_NOMEM(r < 0, prod->lens->info);
1521 list_cons(prod->start->next, result);
1526 static struct trans *add_trans(struct rtn *rtn, struct state *state,
1527 struct state *to, struct lens *l) {
1529 struct trans *result = NULL;
1531 for (int i=0; i < state->ntrans; i++)
1532 if (state->trans[i].to == to && state->trans[i].lens == l)
1533 return state->trans + i;
1535 r = REALLOC_N(state->trans, state->ntrans+1);
1536 ERR_NOMEM(r < 0, rtn->info);
1538 result = state->trans + state->ntrans;
1544 result->lens = ref(l);
1545 result->re = ref(ltype(l, rtn->lens_type));
1551 static struct prod *make_prod(struct rtn *rtn, struct lens *l) {
1552 struct prod *result = NULL;
1556 ERR_NOMEM(r < 0, l->info);
1558 result->lens = ref(l);
1559 r = ALLOC(result->start);
1560 ERR_NOMEM(r < 0, l->info);
1562 result->end = add_state(result);
1565 result->end->next = rtn->states;
1566 rtn->states = result->start;
1574 static struct prod *prod_for_lens(struct rtn *rtn, struct lens *l) {
1577 for (int i=0; i < rtn->nprod; i++) {
1578 if (rtn->prod[i]->lens == l)
1579 return rtn->prod[i];
1584 static void rtn_dot(struct rtn *rtn, const char *stage) {
1588 fp = debug_fopen("rtn_%s_%s.dot", stage, lens_type_names[rtn->lens_type]);
1592 fprintf(fp, "digraph \"l1\" {\n rankdir=LR;\n");
1593 list_for_each(s, rtn->states) {
1595 for (int p=0; p < rtn->nprod; p++) {
1596 if (s == rtn->prod[p]->start) {
1597 r = xasprintf(&label, "s%d", p);
1598 } else if (s == rtn->prod[p]->end) {
1599 r = xasprintf(&label, "e%d", p);
1601 ERR_NOMEM(r < 0, rtn->info);
1603 if (label == NULL) {
1604 r = xasprintf(&label, "%p", s);
1605 ERR_NOMEM(r < 0, rtn->info);
1607 fprintf(fp, " n%p [label = \"%s\"];\n", s, label == NULL ? "" : label);
1609 for (int i=0; i < s->ntrans; i++) {
1610 fprintf(fp, " n%p -> n%p", s, s->trans[i].to);
1611 if (s->trans[i].re != NULL) {
1612 label = regexp_escape(s->trans[i].re);
1613 for (char *t = label; *t; t++)
1616 fprintf(fp, " [ label = \"%s\" ]", label);
1627 /* Add transitions to RTN corresponding to cfg(l, N) */
1628 static void rtn_rules(struct rtn *rtn, struct lens *l) {
1632 struct prod *prod = prod_for_lens(rtn, l);
1636 int r = REALLOC_N(rtn->prod, rtn->nprod+1);
1637 ERR_NOMEM(r < 0, l->info);
1639 prod = make_prod(rtn, l);
1640 rtn->prod[rtn->nprod] = prod;
1644 struct state *start = prod->start;
1648 /* cfg(l1|..|ln, N) -> N := N1 | N2 | ... | Nn */
1649 for (int i=0; i < l->nchildren; i++) {
1650 add_trans(rtn, start, prod->end, l->children[i]);
1652 rtn_rules(rtn, l->children[i]);
1657 /* cfg(l1 . l2 ... ln, N) -> N := N1 . N2 ... Nn */
1658 for (int i=0; i < l->nchildren-1; i++) {
1659 struct state *s = add_state(prod);
1661 add_trans(rtn, start, s, l->children[i]);
1664 rtn_rules(rtn, l->children[i]);
1668 struct lens *c = l->children[l->nchildren - 1];
1669 add_trans(rtn, start, prod->end, c);
1676 /* cfg(l*, N) -> N := N . N' | eps */
1677 struct state *s = add_state(prod);
1679 add_trans(rtn, start, s, l);
1681 add_trans(rtn, s, prod->end, l->child);
1683 add_trans(rtn, start, prod->end, NULL);
1685 rtn_rules(rtn, l->child);
1690 switch (rtn->lens_type) {
1693 /* cfg([ l ], N) -> N := eps */
1694 add_trans(rtn, start, prod->end, NULL);
1697 /* cfg([ l ], N) -> N := N' plus cfg(l, N') */
1698 add_trans(rtn, start, prod->end, l->child);
1700 rtn_rules(rtn, l->child);
1704 /* At this point, we have propagated ktype and vtype */
1705 /* cfg([ l ], N) -> N := enc(l->ktype, l->vtype) */
1706 struct trans *t = add_trans(rtn, start, prod->end, NULL);
1708 t->re = subtree_atype(l->info, l->child->ktype, l->child->vtype);
1712 BUG_ON(true, rtn->info, "Unexpected lens type %d", rtn->lens_type);
1717 /* cfg(l?, N) -> N := N' | eps plus cfg(l, N') */
1718 add_trans(rtn, start, prod->end, l->child);
1720 add_trans(rtn, start, prod->end, NULL);
1722 rtn_rules(rtn, l->child);
1726 /* cfg(l, N) -> N := N' plus cfg(l->body, N') */
1727 add_trans(rtn, start, prod->end, l->body);
1729 rtn_rules(rtn, l->body);
1733 add_trans(rtn, start, prod->end, l->child);
1744 /* Replace transition t with two epsilon transitions s => p->start and
1745 * p->end => s->trans[i].to where s is the start of t. Instead of adding
1746 * epsilon transitions, we expand the epsilon transitions.
1748 static void prod_splice(struct rtn *rtn,
1749 struct prod *from, struct prod *to, struct trans *t) {
1751 add_trans(rtn, to->end, t->to, NULL);
1752 ERR_BAIL(from->lens->info);
1754 unref(t->re, regexp);
1760 static void rtn_splice(struct rtn *rtn, struct prod *prod) {
1761 for (struct state *s = prod->start; s != prod->end; s = s->next) {
1762 for (int i=0; i < s->ntrans; i++) {
1763 struct prod *p = prod_for_lens(rtn, s->trans[i].lens);
1765 prod_splice(rtn, prod, p, s->trans+i);
1774 static struct rtn *rtn_build(struct lens *rec, enum lens_type lt) {
1779 ERR_NOMEM(r < 0, rec->info);
1781 rtn->info = ref(rec->info);
1782 rtn->lens_type = lt;
1784 rtn_rules(rtn, rec);
1786 if (debugging("cf.approx"))
1787 rtn_dot(rtn, "10-rules");
1789 for (int i=0; i < rtn->nprod; i++) {
1790 rtn_splice(rtn, rtn->prod[i]);
1793 if (debugging("cf.approx"))
1794 rtn_dot(rtn, "11-splice");
1800 /* Compare transitions lexicographically by (to, lens) */
1801 static int trans_to_cmp(const void *v1, const void *v2) {
1802 const struct trans *t1 = v1;
1803 const struct trans *t2 = v2;
1805 if (t1->to != t2->to)
1806 return (t1->to < t2->to) ? -1 : 1;
1808 if (t1->lens == t2->lens)
1810 return (t1->lens < t2->lens) ? -1 : 1;
1813 /* Collapse a transition S1 -> S -> S2 by adding a transition S1 -> S2 with
1814 * lens R1 . (LOOP)* . R2 | R3 where R3 is the regexp on the possibly
1815 * existing transition S1 -> S2. If LOOP is NULL or R3 does not exist,
1816 * label the transition with a simplified regexp by treating NULL as
1818 static void collapse_trans(struct rtn *rtn,
1819 struct state *s1, struct state *s2,
1820 struct regexp *r1, struct regexp *loop,
1821 struct regexp *r2) {
1823 struct trans *t = NULL;
1824 struct regexp *r = NULL;
1826 for (int i=0; i < s1->ntrans; i++) {
1827 if (s1->trans[i].to == s2) {
1833 /* Set R = R1 . (LOOP)* . R2, treating NULL's as epsilon */
1837 else if (r2 == NULL)
1840 r = regexp_concat(rtn->info, r1, r2);
1842 struct regexp *s = regexp_iter(rtn->info, loop, 0, -1);
1843 ERR_NOMEM(s == NULL, rtn->info);
1844 struct regexp *c = NULL;
1849 c = regexp_concat(rtn->info, r1, s);
1851 ERR_NOMEM(c == NULL, rtn->info);
1857 r = regexp_concat(rtn->info, c, r2);
1859 ERR_NOMEM(r == NULL, rtn->info);
1864 t = add_trans(rtn, s1, s2, NULL);
1865 ERR_NOMEM(t == NULL, rtn->info);
1867 } else if (t->re == NULL) {
1868 if (r == NULL || regexp_matches_empty(r))
1871 t->re = regexp_maybe(rtn->info, r);
1873 ERR_NOMEM(t->re == NULL, rtn->info);
1875 } else if (r == NULL) {
1876 if (!regexp_matches_empty(t->re)) {
1877 r = regexp_maybe(rtn->info, t->re);
1878 unref(t->re, regexp);
1880 ERR_NOMEM(r == NULL, rtn->info);
1883 struct regexp *u = regexp_union(rtn->info, r, t->re);
1885 unref(t->re, regexp);
1887 ERR_NOMEM(u == NULL, rtn->info);
1892 rtn->exn = rtn->info->error->exn;
1896 /* Reduce the automaton with start state rprod->start and only accepting
1897 * state rprod->end so that we have a single transition rprod->start =>
1898 * rprod->end labelled with the overall approximating regexp for the
1901 * This is the same algorithm as fa_as_regexp in fa.c
1903 static struct regexp *rtn_reduce(struct rtn *rtn, struct lens *rec) {
1904 struct prod *prod = prod_for_lens(rtn, rec);
1907 ERR_THROW(prod == NULL, rtn->info, AUG_EINTERNAL,
1908 "No production for recursive lens");
1910 /* Eliminate epsilon transitions and turn transitions between the same
1911 * two states into a regexp union */
1912 list_for_each(s, rtn->states) {
1913 qsort(s->trans, s->ntrans, sizeof(*s->trans), trans_to_cmp);
1914 for (int i=0; i < s->ntrans; i++) {
1916 for (;j < s->ntrans && s->trans[i].to == s->trans[j].to;
1919 struct regexp *u, **v;
1920 r = ALLOC_N(v, j - i);
1921 ERR_NOMEM(r < 0, rtn->info);
1922 for (int k=i; k < j; k++)
1923 v[k-i] = s->trans[k].re;
1924 u = regexp_union_n(rtn->info, j - i, v);
1926 // FIXME: The calling convention for regexp_union_n
1927 // is bad, since we can't distinguish between alloc
1928 // failure and unioning all NULL's
1929 for (int k=0; k < j-i; k++)
1932 ERR_NOMEM(true, rtn->info);
1936 for (int k=i; k < j; k++) {
1937 unref(s->trans[k].lens, lens);
1938 unref(s->trans[k].re, regexp);
1941 MEMMOVE(s->trans + (i+1),
1944 s->ntrans -= j - (i + 1);
1949 /* Introduce new start and end states with epsilon transitions to/from
1950 * the old start and end states */
1951 struct state *end = NULL;
1952 struct state *start = NULL;
1953 if (ALLOC(start) < 0 || ALLOC(end) < 0) {
1956 ERR_NOMEM(true, rtn->info);
1958 list_insert_before(start, prod->start, rtn->states);
1959 end->next = prod->end->next;
1960 prod->end->next = end;
1962 add_trans(rtn, start, prod->start, NULL);
1964 add_trans(rtn, prod->end, end, NULL);
1967 prod->start = start;
1970 /* Eliminate states S (except for INI and FIN) one by one:
1971 * Let LOOP the regexp for the transition S -> S if it exists, epsilon
1973 * For all S1, S2 different from S with S1 -> S -> S2
1974 * Let R1 the regexp of S1 -> S
1975 * R2 the regexp of S -> S2
1976 * R3 the regexp of S1 -> S2 (or the regexp matching nothing
1977 * if no such transition)
1978 * set the regexp on the transition S1 -> S2 to
1979 * R1 . (LOOP)* . R2 | R3 */
1980 // FIXME: This does not go over all states
1981 list_for_each(s, rtn->states) {
1982 if (s == prod->end || s == prod->start)
1984 struct regexp *loop = NULL;
1985 for (int i=0; i < s->ntrans; i++) {
1986 if (s == s->trans[i].to) {
1987 ensure(loop == NULL, rtn->info);
1988 loop = s->trans[i].re;
1991 list_for_each(s1, rtn->states) {
1994 for (int t1=0; t1 < s1->ntrans; t1++) {
1995 if (s == s1->trans[t1].to) {
1996 for (int t2=0; t2 < s->ntrans; t2++) {
1997 struct state *s2 = s->trans[t2].to;
2000 collapse_trans(rtn, s1, s2,
2001 s1->trans[t1].re, loop,
2010 /* Find the overall regexp */
2011 struct regexp *result = NULL;
2012 for (int i=0; i < prod->start->ntrans; i++) {
2013 if (prod->start->trans[i].to == prod->end) {
2014 ensure(result == NULL, rtn->info);
2015 result = ref(prod->start->trans[i].re);
2023 static void propagate_type(struct lens *l, enum lens_type lt) {
2024 struct regexp **types = NULL;
2027 if (! l->recursive || ltype(l, lt) != NULL)
2032 r = ALLOC_N(types, l->nchildren);
2033 ERR_NOMEM(r < 0, l->info);
2034 for (int i=0; i < l->nchildren; i++) {
2035 propagate_type(l->children[i], lt);
2036 types[i] = ltype(l->children[i], lt);
2038 ltype(l, lt) = regexp_concat_n(l->info, l->nchildren, types);
2042 r = ALLOC_N(types, l->nchildren);
2043 ERR_NOMEM(r < 0, l->info);
2044 for (int i=0; i < l->nchildren; i++) {
2045 propagate_type(l->children[i], lt);
2046 types[i] = ltype(l->children[i], lt);
2048 ltype(l, lt) = regexp_union_n(l->info, l->nchildren, types);
2052 propagate_type(l->child, lt);
2054 l->atype = subtree_atype(l->info, l->child->ktype, l->child->vtype);
2056 l->ctype = ref(l->child->ctype);
2059 propagate_type(l->child, lt);
2060 ltype(l, lt) = regexp_iter(l->info, ltype(l->child, lt), 0, -1);
2063 propagate_type(l->child, lt);
2064 ltype(l, lt) = regexp_maybe(l->info, ltype(l->child, lt));
2070 propagate_type(l->child, lt);
2071 ltype(l, lt) = ref(ltype(l->child, lt));
2082 static struct value *typecheck(struct lens *l, int check);
2084 typedef struct value *typecheck_n_make(struct info *,
2085 struct lens *, struct lens *, int);
2087 static struct info *merge_info(struct info *i1, struct info *i2) {
2090 ERR_NOMEM(info == NULL, i1);
2092 info->filename = ref(i1->filename);
2093 info->first_line = i1->first_line;
2094 info->first_column = i1->first_column;
2095 info->last_line = i2->last_line;
2096 info->last_column = i2->last_column;
2097 info->error = i1->error;
2105 static struct value *typecheck_n(struct lens *l,
2106 typecheck_n_make *make, int check) {
2107 struct value *exn = NULL;
2108 struct lens *acc = NULL;
2110 ensure(l->tag == L_CONCAT || l->tag == L_UNION, l->info);
2111 for (int i=0; i < l->nchildren; i++) {
2112 exn = typecheck(l->children[i], check);
2116 acc = ref(l->children[0]);
2117 for (int i=1; i < l->nchildren; i++) {
2118 struct info *info = merge_info(acc->info, l->children[i]->info);
2119 ERR_NOMEM(info == NULL, acc->info);
2120 exn = (*make)(info, acc, ref(l->children[i]), check);
2123 ensure(exn->tag == V_LENS, l->info);
2124 acc = ref(exn->lens);
2127 l->value = acc->value;
2134 static struct value *typecheck(struct lens *l, int check) {
2135 struct value *exn = NULL;
2137 /* Nonrecursive lenses are typechecked at build time */
2143 exn = typecheck_n(l, lns_make_concat, check);
2146 exn = typecheck_n(l, lns_make_union, check);
2150 exn = typecheck(l->child, check);
2154 exn = typecheck_iter(l->info, l->child);
2155 if (exn == NULL && l->value)
2156 exn = make_exn_value(l->info, "Multiple stores in iteration");
2157 if (exn == NULL && l->key)
2158 exn = make_exn_value(l->info, "Multiple keys/labels in iteration");
2162 exn = typecheck_maybe(l->info, l->child);
2163 l->key = l->child->key;
2164 l->value = l->child->value;
2177 static struct value *rtn_approx(struct lens *rec, enum lens_type lt) {
2178 struct rtn *rtn = NULL;
2179 struct value *result = NULL;
2181 rtn = rtn_build(rec, lt);
2183 ltype(rec, lt) = rtn_reduce(rtn, rec);
2185 if (debugging("cf.approx"))
2186 rtn_dot(rtn, "50-reduce");
2188 propagate_type(rec->body, lt);
2189 ERR_BAIL(rec->info);
2194 if (debugging("cf.approx")) {
2195 printf("approx %s => ", lens_type_names[lt]);
2196 print_regexp(stdout, ltype(rec, lt));
2202 if (rtn->exn == NULL)
2203 result = rec->info->error->exn;
2205 result = ref(rtn->exn);
2209 static struct value *
2210 exn_multiple_epsilons(struct lens *lens,
2211 struct lens *l1, struct lens *l2) {
2213 struct value *exn = NULL;
2215 exn = make_exn_value(ref(lens->info),
2216 "more than one nullable branch in a union");
2217 fi = format_info(l1->info);
2218 exn_printf_line(exn, "First nullable lens: %s", fi);
2221 fi = format_info(l2->info);
2222 exn_printf_line(exn, "Second nullable lens: %s", fi);
2228 /* Update lens->ctype_nullable and return 1 if there was a change,
2229 * 0 if there was none */
2230 static int ctype_nullable(struct lens *lens, struct value **exn) {
2233 struct lens *null_lens = NULL;
2235 if (! lens->recursive)
2241 for (int i=0; i < lens->nchildren; i++) {
2242 if (ctype_nullable(lens->children[i], exn))
2244 if (! lens->children[i]->ctype_nullable)
2249 for (int i=0; i < lens->nchildren; i++) {
2250 if (ctype_nullable(lens->children[i], exn))
2252 if (lens->children[i]->ctype_nullable) {
2254 *exn = exn_multiple_epsilons(lens, null_lens,
2259 null_lens = lens->children[i];
2265 ret = ctype_nullable(lens->child, exn);
2266 nullable = lens->child->ctype_nullable;
2273 nullable = lens->body->ctype_nullable;
2281 if (nullable != lens->ctype_nullable) {
2283 lens->ctype_nullable = nullable;
2288 struct value *lns_check_rec(struct info *info,
2289 struct lens *body, struct lens *rec,
2291 /* The types in the order of approximation */
2292 static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
2293 struct value *result = NULL;
2295 ensure(rec->tag == L_REC, info);
2296 ensure(rec->rec_internal, info);
2298 /* The user might have written down a regular lens with 'let rec' */
2299 if (! body->recursive) {
2300 result = make_lens_value(ref(body));
2301 ERR_NOMEM(result == NULL, info);
2305 /* To help memory management, we avoid the cycle inherent ina recursive
2306 * lens by using two instances of an L_REC lens. One is marked with
2307 * rec_internal, and used inside the body of the lens. The other is the
2308 * "toplevel" which receives external references.
2310 * The internal instance of the recursive lens is REC, the external one
2311 * is TOP, constructed below
2313 rec->body = body; /* REC does not own BODY */
2315 for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
2316 result = rtn_approx(rec, types[i]);
2320 if (rec->atype == NULL) {
2321 result = make_exn_value(ref(rec->info),
2322 "recursive lens generates the empty language for its %s",
2323 rec->ctype == NULL ? "ctype" : "atype");
2327 rec->key = rec->body->key;
2328 rec->value = rec->body->value;
2329 rec->consumes_value = rec->body->consumes_value;
2331 while(ctype_nullable(rec->body, &result));
2334 rec->ctype_nullable = rec->body->ctype_nullable;
2336 result = typecheck(rec->body, check);
2340 result = lns_make_rec(ref(rec->info));
2341 struct lens *top = result->lens;
2342 for (int t=0; t < ntypes; t++)
2343 ltype(top, t) = ref(ltype(rec, t));
2344 top->value = rec->value;
2345 top->key = rec->key;
2346 top->consumes_value = rec->consumes_value;
2347 top->ctype_nullable = rec->ctype_nullable;
2348 top->body = ref(body);
2350 top->rec_internal = 0;
2353 top->jmt = jmt_build(top);
2358 if (result != NULL && result->tag != V_EXN)
2359 unref(result, value);
2361 result = info->error->exn;
2366 void dump_lens_tree(struct lens *lens){
2367 static int count = 0;
2370 fp = debug_fopen("lens_%02d_%s.dot", count++, ltag(lens));
2374 fprintf(fp, "digraph \"%s\" {\n", "lens");
2375 dump_lens(fp, lens);
2381 void dump_lens(FILE *out, struct lens *lens){
2385 fprintf(out, "\"%p\" [ shape = box, label = \"%s\\n", lens, ltag(lens));
2387 for (int t=0; t < ntypes; t++) {
2388 re = ltype(lens, t);
2391 fprintf(out, "%s=",lens_type_names[t]);
2392 print_regexp(out, re);
2393 fprintf(out, "\\n");
2396 fprintf(out, "recursive=%x\\n", lens->recursive);
2397 fprintf(out, "rec_internal=%x\\n", lens->rec_internal);
2398 fprintf(out, "consumes_value=%x\\n", lens->consumes_value);
2399 fprintf(out, "ctype_nullable=%x\\n", lens->ctype_nullable);
2400 fprintf(out, "\"];\n");
2417 for(i = 0; i<lens->nchildren;i++){
2418 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2419 dump_lens(out, lens->children[i]);
2423 for(i = 0; i<lens->nchildren;i++){
2424 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2425 dump_lens(out, lens->children[i]);
2429 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2430 dump_lens(out, lens->child);
2433 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2434 dump_lens(out, lens->child);
2438 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2439 dump_lens(out, lens->child);
2443 if (lens->rec_internal == 0){
2444 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2445 dump_lens(out, lens->body);
2449 fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2450 dump_lens(out, lens->child);
2453 fprintf(out, "ERROR\n");
2461 * indent-tabs-mode: nil