/*
* fa.c: finite automata
*
- * Copyright (C) 2007-2011 David Lutterkort
+ * Copyright (C) 2007-2016 David Lutterkort
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
unsigned int accept : 1;
unsigned int live : 1;
unsigned int reachable : 1;
+ unsigned int visited : 1; /* Used in various places to track progress */
/* Array of transitions. The TUSED first entries are used, the array
has allocated room for TSIZE */
size_t tused;
struct state_set *accept = state_set_init(-1, S_NONE);
int r;
+ E(accept == NULL);
+
r = mark_reachable(fa);
E(r < 0);
}
return accept;
error:
+ state_set_free(accept);
return NULL;
}
int r;
all = fa_states(fa);
+ E(all == NULL);
accept = fa_accept_states(fa);
+ E(accept == NULL);
F(state_set_init_data(all));
E(points == NULL);
if (make_ini) {
ini = state_set_init(-1, S_NONE);
- if (ini == NULL || state_set_push(ini, fa->initial) < 0)
+ if (ini == NULL || state_set_push(ini, fa->initial) < 0) {
+ state_set_free(ini);
goto error;
+ }
}
F(state_set_list_add(&worklist, ini));
int *nsnum = NULL;
int *nsind = NULL;
int result = -1;
+ unsigned int nstates = 0;
+ int nsigma = 0;
F(determinize(fa, NULL));
list_for_each(s, fa->initial) {
F(state_set_push(states, s));
}
- unsigned int nstates = states->used;
+ nstates = states->used;
- int nsigma;
sigma = start_points(fa, &nsigma);
E(sigma == NULL);
return -1;
}
+struct enum_intl {
+ int limit;
+ int nwords;
+ char **words;
+ char *buf;
+ size_t bsize;
+};
+
+static int fa_enumerate_intl(struct state *s, struct enum_intl *ei, int pos) {
+ int result = -1;
+
+ if (ei->bsize <= pos + 1) {
+ ei->bsize *= 2;
+ F(REALLOC_N(ei->buf, ei->bsize));
+ }
+
+ ei->buf[pos] = '\0';
+ for_each_trans(t, s) {
+ if (t->to->visited)
+ return -2;
+ t->to->visited = 1;
+ for (int i=t->min; i <= t->max; i++) {
+ ei->buf[pos] = i;
+ if (t->to->accept) {
+ if (ei->nwords >= ei->limit)
+ return -2;
+ ei->words[ei->nwords] = strdup(ei->buf);
+ E(ei->words[ei->nwords] == NULL);
+ ei->nwords += 1;
+ }
+ result = fa_enumerate_intl(t->to, ei, pos+1);
+ E(result < 0);
+ }
+ t->to->visited = 0;
+ }
+ ei->buf[pos] = '\0';
+ result = 0;
+ error:
+ return result;
+}
+
+int fa_enumerate(struct fa *fa, int limit, char ***words) {
+ struct enum_intl ei;
+ int result = -1;
+
+ *words = NULL;
+ MEMZERO(&ei, 1);
+ ei.bsize = 8; /* Arbitrary initial size */
+ ei.limit = limit;
+ F(ALLOC_N(ei.words, limit));
+ F(ALLOC_N(ei.buf, ei.bsize));
+
+ /* We use the visited bit to track which states we already visited
+ * during the construction of a word to detect loops */
+ list_for_each(s, fa->initial)
+ s->visited = 0;
+ fa->initial->visited = 1;
+ if (fa->initial->accept) {
+ if (ei.nwords >= limit)
+ return -2;
+ ei.words[0] = strdup("");
+ E(ei.words[0] == NULL);
+ ei.nwords = 1;
+ }
+ result = fa_enumerate_intl(fa->initial, &ei, 0);
+ E(result < 0);
+
+ result = ei.nwords;
+ *words = ei.words;
+ ei.words = NULL;
+ done:
+ free(ei.buf);
+ return result;
+
+ error:
+ for (int i=0; i < ei.nwords; i++)
+ free(ei.words[i]);
+ free(ei.words);
+ goto done;
+}
+
/* Expand the automaton FA by replacing every transition s(c) -> p from
* state s to p on character c by two transitions s(X) -> r, r(c) -> p via
* a new state r.
continue;
struct state *r = add_state(fa, 0);
+ if (r == NULL)
+ goto error;
r->trans = p->trans;
r->tused = p->tused;
r->tsize = p->tsize;
static bitset *alphabet(struct fa *fa) {
bitset *bs = bitset_init(UCHAR_NUM);
+ if (bs == NULL)
+ return NULL;
+
list_for_each(s, fa->initial) {
for (int i=0; i < s->tused; i++) {
for (uint c = s->trans[i].min; c <= s->trans[i].max; c++)
static bitset *last_chars(struct fa *fa) {
bitset *bs = bitset_init(UCHAR_NUM);
+ if (bs == NULL)
+ return NULL;
+
list_for_each(s, fa->initial) {
for (int i=0; i < s->tused; i++) {
if (s->trans[i].to->accept) {
bitset *bs = bitset_init(UCHAR_NUM);
struct state *s = fa->initial;
+ if (bs == NULL)
+ return NULL;
+
for (int i=0; i < s->tused; i++) {
for (uint c = s->trans[i].min; c <= s->trans[i].max; c++)
bitset_set(bs, c);
return bs;
}
-/* Return true if F1 and F2 are known to be unambiguously concatenable
- * according to simple heuristics. Return false if they need to be checked
- * further to decide ambiguity */
-static bool is_splittable(struct fa *fa1, struct fa *fa2) {
+/* Return 1 if F1 and F2 are known to be unambiguously concatenable
+ * according to simple heuristics. Return 0 if they need to be checked
+ * further to decide ambiguity
+ * Return -1 if an allocation fails
+ */
+static int is_splittable(struct fa *fa1, struct fa *fa2) {
bitset *alpha1 = NULL;
bitset *alpha2 = NULL;
bitset *last1 = NULL;
bitset *first2 = NULL;
- bool result = false;
+ bool result = -1;
alpha2 = alphabet(fa2);
last1 = last_chars(fa1);
+ if (alpha2 == NULL || last1 == NULL)
+ goto done;
if (bitset_disjoint(last1, alpha2, UCHAR_NUM)) {
- result = true;
+ result = 1;
goto done;
}
alpha1 = alphabet(fa1);
first2 = first_chars(fa2);
+ if (alpha1 == NULL || first2 == NULL)
+ goto done;
if (bitset_disjoint(first2, alpha1, UCHAR_NUM)) {
- result = true;
+ result = 1;
goto done;
}
+ result = 0;
done:
bitset_free(alpha1);
bitset_free(alpha2);
static const char X = '\001';
static const char Y = '\002';
char *result = NULL, *s = NULL;
+ size_t result_len = 0;
int ret = -1, r;
struct fa *mp = NULL, *ms = NULL, *sp = NULL, *ss = NULL, *amb = NULL;
struct fa *a1f = NULL, *a1t = NULL, *a2f = NULL, *a2t = NULL;
if (v != NULL)
*v = NULL;
- if (is_splittable(fa1, fa2))
+ r = is_splittable(fa1, fa2);
+ if (r < 0)
+ goto error;
+ if (r == 1)
return 0;
#define Xs "\001"
if (s != NULL) {
char *t;
- F(ALLOC_N(result, (strlen(s)-1)/2 + 1));
+ result_len = (s_len-1)/2 - 1;
+ F(ALLOC_N(result, result_len + 1));
t = result;
int i = 0;
- for (i=0; s[2*i] == X; i++)
+ for (i=0; s[2*i] == X; i++) {
+ assert((t - result) < result_len);
*t++ = s[2*i + 1];
+ }
if (pv != NULL)
*pv = t;
i += 1;
- for ( ;s[2*i] == X; i++)
+ for ( ;s[2*i] == X; i++) {
+ assert((t - result) < result_len);
*t++ = s[2*i + 1];
+ }
if (v != NULL)
*v = t;
i += 1;
- for (; 2*i+1 < strlen(s); i++)
+ for (; 2*i+1 < s_len; i++) {
+ assert((t - result) < result_len);
*t++ = s[2*i + 1];
+ }
}
ret = 0;
FREE(s);
*upv = result;
if (result != NULL)
- *upv_len = strlen(result);
+ *upv_len = result_len;
return ret;
error:
FREE(result);