1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 2007 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
41 #include "sphinxbase/ckd_alloc.h"
42 #include "sphinxbase/strfuncs.h"
43 #include "sphinxbase/hash_table.h"
44 #include "sphinxbase/err.h"
46 #include "jsgf_internal.h"
47 #include "jsgf_parser.h"
48 #include "jsgf_scanner.h"
50 int yyparse (yyscan_t yyscanner, jsgf_t *jsgf);
55 * This file implements the data structures for parsing JSGF grammars
56 * into Sphinx finite-state grammars.
60 jsgf_atom_new(char *name, float weight)
64 atom = ckd_calloc(1, sizeof(*atom));
65 atom->name = ckd_salloc(name);
66 atom->weight = weight;
71 jsgf_atom_free(jsgf_atom_t *atom)
81 jsgf_grammar_new(jsgf_t *parent)
85 grammar = ckd_calloc(1, sizeof(*grammar));
86 /* If this is an imported/subgrammar, then we will share a global
87 * namespace with the parent grammar. */
89 grammar->rules = parent->rules;
90 grammar->imports = parent->imports;
91 grammar->searchpath = parent->searchpath;
92 grammar->parent = parent;
97 grammar->rules = hash_table_new(64, 0);
98 grammar->imports = hash_table_new(16, 0);
100 /* Silvio Moioli: no getenv() in Windows CE */
101 #if !defined(_WIN32_WCE)
102 if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
105 /* FIXME: This should be a function in libsphinxbase. */
106 /* FIXME: Also nextword() is totally useless... */
107 word = jsgf_path = ckd_salloc(jsgf_path);
108 while ((c = strchr(word, ':'))) {
110 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
113 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
114 grammar->searchpath = glist_reverse(grammar->searchpath);
117 /* Default to current directory. */
118 grammar->searchpath = glist_add_ptr(grammar->searchpath, ckd_salloc("."));
127 jsgf_grammar_free(jsgf_t *jsgf)
129 /* FIXME: Probably should just use refcounting instead. */
130 if (jsgf->parent == NULL) {
134 for (itor = hash_table_iter(jsgf->rules); itor;
135 itor = hash_table_iter_next(itor)) {
136 ckd_free((char *)itor->ent->key);
137 jsgf_rule_free((jsgf_rule_t *)itor->ent->val);
139 hash_table_free(jsgf->rules);
140 for (itor = hash_table_iter(jsgf->imports); itor;
141 itor = hash_table_iter_next(itor)) {
142 ckd_free((char *)itor->ent->key);
143 jsgf_grammar_free((jsgf_t *)itor->ent->val);
145 hash_table_free(jsgf->imports);
146 for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
147 ckd_free(gnode_ptr(gn));
148 glist_free(jsgf->searchpath);
149 for (gn = jsgf->links; gn; gn = gnode_next(gn))
150 ckd_free(gnode_ptr(gn));
151 glist_free(jsgf->links);
153 ckd_free(jsgf->name);
154 ckd_free(jsgf->version);
155 ckd_free(jsgf->charset);
156 ckd_free(jsgf->locale);
161 jsgf_rhs_free(jsgf_rhs_t *rhs)
168 jsgf_rhs_free(rhs->alt);
169 for (gn = rhs->atoms; gn; gn = gnode_next(gn))
170 jsgf_atom_free(gnode_ptr(gn));
171 glist_free(rhs->atoms);
176 jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus)
179 jsgf_atom_t *rule_atom;
182 /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */
183 /* Or if plus is true, (<name> | <name> <g0006>) */
184 rhs = ckd_calloc(1, sizeof(*rhs));
186 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
188 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
189 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
190 rule_atom = jsgf_atom_new(rule->name, 1.0);
191 rhs = ckd_calloc(1, sizeof(*rhs));
192 rhs->atoms = glist_add_ptr(NULL, rule_atom);
193 rhs->atoms = glist_add_ptr(rhs->atoms, atom);
194 rule->rhs->alt = rhs;
196 return jsgf_atom_new(rule->name, 1.0);
200 jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp)
202 jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
203 jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
205 rhs->atoms = glist_add_ptr(NULL, atom);
206 return jsgf_define_rule(jsgf, NULL, rhs, 0);
210 jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to)
214 link = ckd_calloc(1, sizeof(*link));
218 grammar->links = glist_add_ptr(grammar->links, link);
222 extract_grammar_name(char *rule_name)
225 char* grammar_name = ckd_salloc(rule_name+1);
226 if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
227 ckd_free(grammar_name);
235 jsgf_fullname(jsgf_t *jsgf, const char *name)
239 /* Check if it is already qualified */
240 if (strchr(name + 1, '.'))
241 return ckd_salloc(name);
243 /* Skip leading < in name */
244 fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
245 sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
250 jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name)
252 char *fullname, *grammar_name;
254 /* Check if it is already qualified */
255 if (strchr(name + 1, '.'))
256 return ckd_salloc(name);
258 /* Skip leading < in name */
259 if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
260 return ckd_salloc(name);
261 fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
262 sprintf(fullname, "<%s.%s", grammar_name, name + 1);
263 ckd_free(grammar_name);
268 /* Extract as rulename everything after the secondlast dot, if existent.
269 * Because everything before the secondlast dot is the path-specification. */
271 importname2rulename(char *importname)
273 char *rulename = ckd_salloc(importname);
275 char *secondlast_dotpos;
277 if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) {
279 if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) {
281 *secondlast_dotpos='<';
282 secondlast_dotpos = ckd_salloc(secondlast_dotpos);
284 return secondlast_dotpos;
296 static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule);
298 expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs)
303 /* Last node expanded in this sequence. */
304 lastnode = rule->entry;
306 /* Iterate over atoms in rhs and generate links/nodes */
307 for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
308 jsgf_atom_t *atom = gnode_ptr(gn);
309 if (jsgf_atom_is_rule(atom)) {
310 jsgf_rule_t *subrule;
315 /* Special case for <NULL> and <VOID> pseudo-rules */
316 if (0 == strcmp(atom->name, "<NULL>")) {
317 /* Emit a NULL transition */
318 jsgf_add_link(grammar, atom,
319 lastnode, grammar->nstate);
320 lastnode = grammar->nstate;
324 else if (0 == strcmp(atom->name, "<VOID>")) {
325 /* Make this entire RHS unspeakable */
329 fullname = jsgf_fullname_from_rule(rule, atom->name);
330 if (hash_table_lookup(grammar->rules, fullname, &val) == -1) {
331 E_ERROR("Undefined rule in RHS: %s\n", fullname);
337 /* Look for this in the stack of expanded rules */
338 for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode))
339 if (gnode_ptr(subnode) == (void *)subrule)
341 if (subnode != NULL) {
342 /* Allow right-recursion only. */
343 if (gnode_next(gn) != NULL) {
344 E_ERROR("Only right-recursion is permitted (in %s.%s)\n",
345 grammar->name, rule->name);
348 /* Add a link back to the beginning of this rule instance */
349 E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, subrule->entry);
350 jsgf_add_link(grammar, atom, lastnode, subrule->entry);
353 /* Expand the subrule */
354 if (expand_rule(grammar, subrule) == -1)
356 /* Add a link into the subrule. */
357 jsgf_add_link(grammar, atom,
358 lastnode, subrule->entry);
359 lastnode = subrule->exit;
363 /* Add a link for this token and create a new exit node. */
364 jsgf_add_link(grammar, atom,
365 lastnode, grammar->nstate);
366 lastnode = grammar->nstate;
375 expand_rule(jsgf_t *grammar, jsgf_rule_t *rule)
380 /* Push this rule onto the stack */
381 grammar->rulestack = glist_add_ptr(grammar->rulestack, rule);
383 /* Normalize weights for all alternatives exiting rule->entry */
385 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
387 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
388 norm += atom->weight;
392 rule->entry = grammar->nstate++;
393 rule->exit = grammar->nstate++;
394 if (norm == 0) norm = 1;
395 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
399 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
400 atom->weight /= norm;
402 lastnode = expand_rhs(grammar, rule, rhs);
403 if (lastnode == -1) {
407 jsgf_add_link(grammar, NULL, lastnode, rule->exit);
411 /* Pop this rule from the rule stack */
412 grammar->rulestack = gnode_free(grammar->rulestack, NULL);
417 jsgf_rule_iter(jsgf_t *grammar)
419 return hash_table_iter(grammar->rules);
423 jsgf_get_rule(jsgf_t *grammar, char const *name)
427 if (hash_table_lookup(grammar->rules, name, &val) < 0)
429 return (jsgf_rule_t *)val;
433 jsgf_rule_name(jsgf_rule_t *rule)
439 jsgf_rule_public(jsgf_rule_t *rule)
445 jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule,
446 logmath_t *lmath, float32 lw, int do_closure)
452 /* Clear previous links */
453 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
454 ckd_free(gnode_ptr(gn));
456 glist_free(grammar->links);
457 grammar->links = NULL;
458 rule->entry = rule->exit = 0;
460 expand_rule(grammar, rule);
462 fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
463 fsg->start_state = rule->entry;
464 fsg->final_state = rule->exit;
465 grammar->links = glist_reverse(grammar->links);
466 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
467 jsgf_link_t *link = gnode_ptr(gn);
470 if (jsgf_atom_is_rule(link->atom)) {
471 fsg_model_null_trans_add(fsg, link->from, link->to,
472 logmath_log(lmath, link->atom->weight));
475 int wid = fsg_model_word_add(fsg, link->atom->name);
476 fsg_model_trans_add(fsg, link->from, link->to,
477 logmath_log(lmath, link->atom->weight), wid);
481 fsg_model_null_trans_add(fsg, link->from, link->to, 0);
485 nulls = fsg_model_null_trans_closure(fsg, NULL);
493 jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule,
494 logmath_t *lmath, float32 lw)
496 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
500 jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule,
501 logmath_t *lmath, float32 lw)
503 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
507 jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
510 logmath_t *lmath = logmath_init(1.0001, 0, 0);
512 if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
515 fsg_model_write(fsg, outfh);
524 jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int public)
530 name = ckd_malloc(strlen(jsgf->name) + 16);
531 sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules));
536 newname = jsgf_fullname(jsgf, name);
540 rule = ckd_calloc(1, sizeof(*rule));
542 rule->name = ckd_salloc(name);
544 rule->public = public;
546 E_INFO("Defined rule: %s%s\n",
547 rule->public ? "PUBLIC " : "",
549 val = hash_table_enter(jsgf->rules, name, rule);
550 if (val != (void *)rule) {
551 E_WARN("Multiply defined symbol: %s\n", name);
557 jsgf_rule_retain(jsgf_rule_t *rule)
564 jsgf_rule_free(jsgf_rule_t *rule)
568 if (--rule->refcnt > 0)
570 jsgf_rhs_free(rule->rhs);
571 ckd_free(rule->name);
577 /* FIXME: This should go in libsphinxutil */
579 path_list_search(glist_t paths, char *path)
583 for (gn = paths; gn; gn = gnode_next(gn)) {
587 fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
588 tmp = fopen(fullpath, "r");
600 jsgf_import_rule(jsgf_t *jsgf, char *name)
602 char *c, *path, *newpath;
603 size_t namelen, packlen;
608 /* Trim the leading and trailing <> */
609 namelen = strlen(name);
610 path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
611 strcpy(path, name + 1);
612 /* Split off the first part of the name */
613 c = strrchr(path, '.');
615 E_ERROR("Imported rule is not qualified: %s\n", name);
622 /* Look for import foo.* */
623 import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>"));
625 /* Construct a filename. */
626 for (c = path; *c; ++c)
627 if (*c == '.') *c = '/';
628 strcat(path, ".gram");
629 newpath = path_list_search(jsgf->searchpath, path);
635 E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
637 /* FIXME: Also, we need to make sure that path is fully qualified
638 * here, by adding any prefixes from jsgf->name to it. */
639 /* See if we have parsed it already */
640 if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
641 E_INFO("Already imported %s\n", path);
646 /* If not, parse it. */
647 imp = jsgf_parse_file(path, jsgf);
648 val = hash_table_enter(jsgf->imports, path, imp);
649 if (val != (void *)imp) {
650 E_WARN("Multiply imported file: %s\n", path);
655 /* Look for public rules matching rulename. */
656 for (itor = hash_table_iter(imp->rules); itor;
657 itor = hash_table_iter_next(itor)) {
658 hash_entry_t *he = itor->ent;
659 jsgf_rule_t *rule = hash_entry_val(he);
661 char *rule_name = importname2rulename(name);
664 /* Match package name (symbol table is shared) */
665 rule_matches = !strncmp(rule_name, rule->name, packlen + 1);
669 rule_matches = !strcmp(rule_name, rule->name);
672 if (rule->public && rule_matches) {
676 /* Link this rule into the current namespace. */
677 c = strrchr(rule->name, '.');
679 newname = jsgf_fullname(jsgf, c);
681 E_INFO("Imported %s\n", newname);
682 val = hash_table_enter(jsgf->rules, newname,
683 jsgf_rule_retain(rule));
684 if (val != (void *)rule) {
685 E_WARN("Multiply defined symbol: %s\n", newname);
688 hash_table_iter_free(itor);
699 jsgf_parse_file(const char *filename, jsgf_t *parent)
706 yylex_init(&yyscanner);
707 if (filename == NULL) {
708 yyset_in(stdin, yyscanner);
711 in = fopen(filename, "r");
713 fprintf(stderr, "Failed to open %s for parsing: %s\n",
714 filename, strerror(errno));
717 yyset_in(in, yyscanner);
720 jsgf = jsgf_grammar_new(parent);
721 yyrv = yyparse(yyscanner, jsgf);
723 fprintf(stderr, "JSGF parse of %s failed\n",
724 filename ? filename : "(stdin)");
725 jsgf_grammar_free(jsgf);
726 yylex_destroy(yyscanner);
731 yylex_destroy(yyscanner);