1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
38 * ms_mgau.c -- Essentially a wrapper that wrap up gauden and
39 * senone. It supports multi-stream.
42 * **********************************************
43 * CMU ARPA Speech Project
45 * Copyright (c) 1997 Carnegie Mellon University.
46 * ALL RIGHTS RESERVED.
47 * **********************************************
50 * Revision 1.2 2006/02/22 16:56:01 arthchan2003
51 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone
53 * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003
54 * Added a flag to turn on and off precomputation.
56 * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu
57 * Add memory deallocation functions. Also move all the initialization
58 * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it
59 * from decode_anytopo and friends.
61 * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003
62 * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc.
64 * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003
65 * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone.
74 static ps_mgaufuncs_t ms_mgau_funcs = {
76 &ms_cont_mgau_frame_eval, /* frame_eval */
77 &ms_mgau_mllr_transform, /* transform */
78 &ms_mgau_free /* free */
82 ms_mgau_init(cmd_ln_t *config, logmath_t *lmath, bin_mdef_t *mdef)
90 msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t));
95 g = msg->g = gauden_init(cmd_ln_str_r(config, "-mean"),
96 cmd_ln_str_r(config, "-var"),
97 cmd_ln_float32_r(config, "-varfloor"),
99 s = msg->s = senone_init(msg->g,
100 cmd_ln_str_r(config, "-mixw"),
101 cmd_ln_str_r(config, "-senmgau"),
102 cmd_ln_float32_r(config, "-mixwfloor"),
105 s->aw = cmd_ln_int32_r(config, "-aw");
107 /* Verify senone parameters against gauden parameters */
108 if (s->n_feat != g->n_feat)
109 E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat,
111 if (s->n_cw != g->n_density)
112 E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n",
113 g->n_density, s->n_cw);
114 if (s->n_gauden > g->n_mgau)
115 E_FATAL("Senones need more codebooks (%d) than present (%d)\n",
116 s->n_gauden, g->n_mgau);
117 if (s->n_gauden < g->n_mgau)
118 E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n",
119 s->n_gauden, g->n_mgau);
121 msg->topn = cmd_ln_int32_r(config, "-topn");
122 E_INFO("The value of topn: %d\n", msg->topn);
123 if (msg->topn == 0 || msg->topn > msg->g->n_density) {
125 ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n",
126 msg->topn, msg->g->n_density);
127 msg->topn = msg->g->n_density;
130 msg->dist = (gauden_dist_t ***)
131 ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn,
132 sizeof(gauden_dist_t));
133 msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8));
135 mg = (ps_mgau_t *)msg;
136 mg->vt = &ms_mgau_funcs;
141 ms_mgau_free(ps_mgau_t * mg)
143 ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
149 ckd_free_3d((void *) msg->dist);
150 ckd_free(msg->mgau_active);
155 ms_mgau_mllr_transform(ps_mgau_t *s,
158 ms_mgau_model_t *msg = (ms_mgau_model_t *)s;
159 return gauden_mllr_transform(msg->g, mllr, msg->config);
163 ms_cont_mgau_frame_eval(ps_mgau_t * mg,
165 uint8 *senone_active,
166 int32 n_senone_active,
171 ms_mgau_model_t *msg = (ms_mgau_model_t *)mg;
178 topn = ms_mgau_topn(msg);
179 g = ms_mgau_gauden(msg);
180 sen = ms_mgau_senone(msg);
185 for (gid = 0; gid < g->n_mgau; gid++)
186 gauden_dist(g, gid, topn, feat, msg->dist[gid]);
188 best = (int32) 0x7fffffff;
189 for (s = 0; s < sen->n_sen; s++) {
190 senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
191 if (best > senscr[s]) {
196 /* Normalize senone scores */
197 for (s = 0; s < sen->n_sen; s++) {
198 int32 bs = senscr[s] - best;
208 /* Flag all active mixture-gaussian codebooks */
209 for (gid = 0; gid < g->n_mgau; gid++)
210 msg->mgau_active[gid] = 0;
213 for (i = 0; i < n_senone_active; i++) {
214 /* senone_active consists of deltas. */
215 int32 s = senone_active[i] + n;
216 msg->mgau_active[sen->mgau[s]] = 1;
220 /* Compute topn gaussian density values (for active codebooks) */
221 for (gid = 0; gid < g->n_mgau; gid++) {
222 if (msg->mgau_active[gid])
223 gauden_dist(g, gid, topn, feat, msg->dist[gid]);
226 best = (int32) 0x7fffffff;
228 for (i = 0; i < n_senone_active; i++) {
229 int32 s = senone_active[i] + n;
230 senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn);
231 if (best > senscr[s]) {
237 /* Normalize senone scores */
239 for (i = 0; i < n_senone_active; i++) {
240 int32 s = senone_active[i] + n;
241 int32 bs = senscr[s] - best;