Initial import to Gerrit.
[profile/ivi/festival.git] / src / arch / festival / viterbi.cc
1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                         Copyright (c) 1999                            */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*                 Authors:  Alan W Black                                */
34 /*                 Date   :  February 1999                               */
35 /*-----------------------------------------------------------------------*/
36 /*  Generic Viterbi search specifications through scheme                 */
37 /*                                                                       */
38 /*=======================================================================*/
39 #include <cstdio>
40 #include "festival.h"
41 #include "lexicon.h"
42
43 static EST_VTCandidate *gv_candlist(EST_Item *s,EST_Features &f);
44 static EST_VTPath *gv_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f);
45 static double gv_find_wfst_prob(EST_VTPath *p,EST_WFST *wfst,
46                                 int n,int &state);
47 static double gv_find_ngram_prob(EST_VTPath *p,EST_Ngrammar *ngram,
48                                  int n,int &state,EST_Features &f);
49
50 LISP Gen_Viterbi(LISP utt)
51 {
52     // For each syllable predict intonation events.
53     EST_Utterance *u = utterance(utt);
54     LISP params = siod_get_lval("gen_vit_params","no gen_vit_params");
55     EST_Features f;
56     EST_WFST *wfst = 0;
57     EST_Ngrammar *ngram = 0;
58     int num_states;
59
60     // Add some defaults
61     f.set("gscale_s",1.0);
62     f.set("gscale_p",0.0);
63     f.set("Relation","Syllable");
64     f.set("return_feat","gen_vit_val");
65     lisp_to_features(params,f);
66
67     if (f.present("ngramname"))
68     {
69         ngram = get_ngram(f.S("ngramname"));
70         num_states = ngram->num_states();
71     }
72     else
73     {
74         wfst = get_wfst(f.S("wfstname"));
75         num_states = wfst->num_states();
76     }
77
78     EST_Viterbi_Decoder v(gv_candlist,gv_npath,num_states);
79     v.f = f;
80
81     v.initialise(u->relation(f.S("Relation")));
82     v.search();
83     v.result("gv_id");
84     if (f.present("debug"))
85     {
86         v.copy_feature("nprob");
87         v.copy_feature("prob");
88         v.copy_feature("score");
89         v.copy_feature("total_score");
90     }
91
92     // Map internal ids back to strings 
93     for (EST_Item *p=u->relation(f.S("Relation"))->head(); p != 0; p=p->next())
94         if (wfst == 0)
95             p->set(f.S("return_feat"),ngram->get_vocab_word(p->I("gv_id")));
96         else
97             p->set(f.S("return_feat"),wfst->in_symbol(p->I("gv_id")));
98
99     return utt;
100 }
101
102 static EST_VTCandidate *gv_candlist(EST_Item *s,EST_Features &f)
103 {
104     LISP p;
105     LISP l;
106     EST_VTCandidate *c;
107     EST_VTCandidate *all_c = 0;
108     EST_WFST *w = 0;
109     EST_Ngrammar *n = 0;
110     float prob;
111
112     // Call user function to get candidate probabilities
113     p = leval(cons(rintern(f.S("cand_function")),
114                    cons(siod(s),NIL)),NIL);
115     if (f.present("ngramname"))
116         n = get_ngram(f.S("ngramname"));
117     else
118         w = get_wfst(f.S("wfstname"));
119
120     for (l=p; l != NIL; l=cdr(l))
121     {
122         prob = get_c_float(car(cdr(car(l))));
123         if (f.present("debug"))
124             s->set(EST_String("cand_")+get_c_string(car(car(l))),prob);
125         if (prob != 0)
126         {
127             c = new EST_VTCandidate;
128             if (w == 0)
129                 c->name = n->get_vocab_word(get_c_string(car(car(l))));
130             else
131                 c->name = w->in_symbol(get_c_string(car(car(l))));
132             c->score = log(prob);
133             c->s = s;
134             c->next = all_c;
135             all_c = c;
136         }
137     }
138     return all_c;
139 }
140
141 static EST_VTPath *gv_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f)
142 {
143     EST_VTPath *np = new EST_VTPath;
144     double prob,lprob;
145     EST_WFST *wfst = 0;
146     EST_Ngrammar *ngram = 0;
147
148     if (f.present("ngramname"))
149         ngram = get_ngram(f.S("ngramname"));
150     else
151         wfst = get_wfst(f.S("wfstname"));
152
153     np->c = c;
154     np->from = p;
155     int n = c->name.Int();
156     if (wfst == 0)
157         prob = gv_find_ngram_prob(p,ngram,n,np->state,f);
158     else
159         prob = gv_find_wfst_prob(p,wfst,n,np->state);
160
161     prob = f.F("gscale_p") + (prob * (1-f.F("gscale_p")));
162
163     if (prob == 0)
164         lprob = log(0.00000001);
165     else
166         lprob = log(prob);
167     
168     if (p==0)
169         np->score = (c->score+lprob);
170     else
171         np->score = (c->score+lprob) + p->score;
172     
173     if (f.present("debug"))
174     {
175         np->f.set("prob",prob);
176         np->f.set("score",c->score);
177         np->f.set("nprob",prob*(exp(c->score)));
178         np->f.set("total_score",np->score);
179     }
180
181     return np;
182 }
183     
184 static double gv_find_wfst_prob(EST_VTPath *p,EST_WFST *wfst,
185                                 int n,int &state)
186 {
187     float prob;
188     int oldstate;
189
190     if (p == 0)
191         oldstate = wfst->start_state();
192     else
193         oldstate = p->state;
194     state = wfst->transition(oldstate,n,n,prob);
195     return prob;
196 }
197
198 static double gv_find_ngram_prob(EST_VTPath *p,EST_Ngrammar *ngram,
199                                  int n,int &state,EST_Features &f)
200 {
201     int oldstate=0;
202     double prob;
203
204     if (p == 0)
205     {
206         // This could be done once before the search is called
207         int order = ngram->order();
208         int i;
209         EST_IVector window(order);
210         
211         if (order > 1)
212             window.a_no_check(order-1) = n;
213         if (order > 2)
214             window.a_no_check(order-2) = 
215                 ngram->get_vocab_word(f.S("p_word"));
216         for (i = order-3; i>=0; i--)
217             window.a_no_check(i) =
218                 ngram->get_vocab_word(f.S("pp_word"));
219         oldstate = ngram->find_state_id(window);
220     }
221     else
222         oldstate = p->state;
223     state = ngram->find_next_state_id(oldstate,n);
224     const EST_DiscreteProbDistribution &pd = ngram->prob_dist(oldstate);
225     if (pd.samples() == 0)
226         prob = 0;
227     else
228         prob = (double)pd.probability(n);
229
230     return prob;
231 }
232