1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
38 * feat.c -- Feature vector description and cepstra->feature computation.
40 * **********************************************
41 * CMU ARPA Speech Project
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
49 * Revision 1.22 2006/02/23 03:59:40 arthchan2003
50 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
52 * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003
53 * Free stuffs in cmn and feat corectly.
55 * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003
56 * Add message to show the directory which the feature is searched for.
58 * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003
59 * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point.
61 * Revision 1.21 2005/06/22 03:29:35 arthchan2003
62 * Makefile.am s for all subdirectory of libs3decoder/
64 * Revision 1.4 2005/04/21 23:50:26 archan
65 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
67 * Revision 1.3 2005/03/30 01:22:46 archan
68 * Fixed mistakes in last updates. Add
71 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72 * Adding feat_free() to free allocated memory
74 * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
75 * Modified feat_s2mfc2feat_block() to handle empty buffers at
76 * the end of an utterance
78 * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
79 * Added feat_s2mfc2feat_block() to allow feature computation
80 * from sequences of blocks of cepstral vectors
82 * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
83 * Major changes to accommodate arbitrary feature input types. Added
84 * feat_read(), moved various cep2feat functions from other files into
85 * this one. Also, made this module object-oriented with the feat_t type.
86 * Changed definition of s2mfc_read to let the caller manage MFC buffers.
88 * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
89 * Added unistd.h include.
91 * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
92 * Added check for sf argument to s2mfc_read being within file size.
94 * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
95 * Added sf, ef parameters to s2mfc_read().
97 * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
98 * Added feat_cepsize().
99 * Added different feature-handling (s2_4x, s3_1x39 at this point).
100 * Moved feature-dependent functions to feature-dependent files.
102 * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
103 * Moved constant declarations from feat.h into here.
105 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
111 * This module encapsulates different feature streams used by the Sphinx group. New
112 * stream types can be added by augmenting feat_init() and providing an accompanying
113 * compute_feat function. It also provides a "generic" feature vector definition for
114 * handling "arbitrary" speech input feature types (see the last section in feat_init()).
115 * In this case the speech input data should already be feature vectors; no computation,
116 * such as MFC->feature conversion, is available or needed.
126 #pragma warning (disable: 4244 4996)
129 #include "sphinxbase/fe.h"
130 #include "sphinxbase/feat.h"
131 #include "sphinxbase/bio.h"
132 #include "sphinxbase/pio.h"
133 #include "sphinxbase/cmn.h"
134 #include "sphinxbase/agc.h"
135 #include "sphinxbase/err.h"
136 #include "sphinxbase/ckd_alloc.h"
137 #include "sphinxbase/prim_type.h"
138 #include "sphinxbase/glist.h"
140 #define FEAT_VERSION "1.0"
141 #define FEAT_DCEP_WIN 2
145 cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text)
149 E_INFO("%s\n", text);
150 for (i = 0; i < nfr; i++) {
151 for (j = 0; j < fcb->cepsize; j++) {
152 fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j]));
154 fprintf(stderr, "\n");
158 feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text)
160 E_INFO("%s\n", text);
161 feat_print(fcb, feat, nfr, stderr);
163 #else /* !DUMP_FEATURES */
164 #define cep_dump_dbg(fcb,mfc,nfr,text)
165 #define feat_print_dbg(fcb,mfc,nfr,text)
169 parse_subvecs(char const *str)
173 glist_t dimlist; /* List of dimensions in one subvector */
174 glist_t veclist; /* List of dimlists (subvectors) */
185 if (sscanf(strp, "%d%n", &n, &l) != 1)
186 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
193 if (sscanf(strp, "%d%n", &n2, &l) != 1)
194 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
201 if ((n < 0) || (n > n2))
202 E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str,
205 for (; n <= n2; n++) {
207 for (gn = dimlist; gn; gn = gnode_next(gn))
208 if (gnode_int32(gn) == n)
211 E_FATAL("'%s': Duplicate dimension ending @pos %d\n",
214 dimlist = glist_add_int32(dimlist, n);
217 if ((*strp == '\0') || (*strp == '/'))
221 E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str);
226 veclist = glist_add_ptr(veclist, (void *) dimlist);
231 assert(*strp == '/');
235 /* Convert the glists to arrays; remember the glists are in reverse order of the input! */
236 n = glist_count(veclist); /* #Subvectors */
237 subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */
238 subvec[n] = NULL; /* sentinel */
240 for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
241 gn2 = (glist_t) gnode_ptr(gn);
243 n2 = glist_count(gn2); /* Length of this subvector */
245 E_FATAL("'%s': 0-length subvector\n", str);
247 subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */
248 subvec[n][n2] = -1; /* sentinel */
250 for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
251 subvec[n][n2] = gnode_int32(gn2);
252 assert((n2 < 0) && (!gn2));
254 assert((n < 0) && (!gn));
256 /* Free the glists */
257 for (gn = veclist; gn; gn = gnode_next(gn)) {
258 gn2 = (glist_t) gnode_ptr(gn);
267 subvecs_free(int32 **subvecs)
271 for (sv = subvecs; sv && *sv; ++sv)
277 feat_set_subvecs(feat_t *fcb, int32 **subvecs)
280 int32 n_sv, n_dim, i;
282 if (subvecs == NULL) {
283 subvecs_free(fcb->subvecs);
284 ckd_free(fcb->sv_buf);
285 ckd_free(fcb->sv_len);
294 if (fcb->n_stream != 1) {
295 E_ERROR("Subvector specifications require single-stream features!");
301 for (sv = subvecs; sv && *sv; ++sv) {
304 for (d = *sv; d && *d != -1; ++d) {
309 if (n_dim > feat_dimension(fcb)) {
310 E_ERROR("Total dimensionality of subvector specification %d "
311 "> feature dimensionality %d\n", n_dim, feat_dimension(fcb));
316 fcb->subvecs = subvecs;
317 fcb->sv_len = ckd_calloc(n_sv, sizeof(*fcb->sv_len));
318 fcb->sv_buf = ckd_calloc(n_dim, sizeof(*fcb->sv_buf));
320 for (i = 0; i < n_sv; ++i) {
322 for (d = subvecs[i]; d && *d != -1; ++d) {
331 * Project feature components to subvectors (if any).
334 feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
338 if (fcb->subvecs == NULL)
340 for (i = 0; i < nfr; ++i) {
345 for (j = 0; j < fcb->n_sv; ++j) {
347 for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
348 *out++ = inout_feat[i][0][*d];
351 memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf));
356 feat_array_alloc(feat_t * fcb, int32 nfr)
359 mfcc_t *data, *d, ***feat;
363 assert(feat_dimension(fcb) > 0);
365 /* Make sure to use the dimensionality of the features *before*
366 LDA and subvector projection. */
368 for (i = 0; i < fcb->n_stream; ++i)
369 k += fcb->stream_len[i];
370 assert(k >= feat_dimension(fcb));
371 assert(k >= fcb->sv_dim);
374 (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *));
375 data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t));
377 for (i = 0; i < nfr; i++) {
379 for (j = 0; j < feat_dimension1(fcb); j++) {
381 d += feat_dimension2(fcb, j);
389 feat_array_free(mfcc_t ***feat)
391 ckd_free(feat[0][0]);
392 ckd_free_2d((void **)feat);
396 feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
400 mfcc_t *w1, *w_1, *_w1, *_w_1;
405 assert(feat_cepsize(fcb) == 13);
406 assert(feat_n_stream(fcb) == 4);
407 assert(feat_stream_len(fcb, 0) == 12);
408 assert(feat_stream_len(fcb, 1) == 24);
409 assert(feat_stream_len(fcb, 2) == 3);
410 assert(feat_stream_len(fcb, 3) == 12);
411 assert(feat_window_size(fcb) == 4);
414 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
417 * DCEP(SHORT): mfc[2] - mfc[-2]
418 * DCEP(LONG): mfc[4] - mfc[-4]
420 w = mfc[2] + 1; /* +1 to skip C0 */
424 for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
427 w = mfc[4] + 1; /* +1 to skip C0 */
430 for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */
433 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
434 w1 = mfc[3] + 1; /* Final +1 to skip C0 */
440 for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
442 d2 = w_1[i] - _w_1[i];
447 /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
450 f[1] = mfc[2][0] - mfc[-2][0];
452 d1 = mfc[3][0] - mfc[-1][0];
453 d2 = mfc[1][0] - mfc[-3][0];
459 feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
463 mfcc_t *w1, *w_1, *_w1, *_w_1;
468 assert(feat_cepsize(fcb) == 13);
469 assert(feat_n_stream(fcb) == 1);
470 assert(feat_stream_len(fcb, 0) == 39);
471 assert(feat_window_size(fcb) == 3);
474 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
476 * DCEP: mfc[2] - mfc[-2];
478 f = feat[0] + feat_cepsize(fcb) - 1;
479 w = mfc[2] + 1; /* +1 to skip C0 */
482 for (i = 0; i < feat_cepsize(fcb) - 1; i++)
485 /* POW: C0, DC0, D2C0 */
486 f += feat_cepsize(fcb) - 1;
489 f[1] = mfc[2][0] - mfc[-2][0];
491 d1 = mfc[3][0] - mfc[-1][0];
492 d2 = mfc[1][0] - mfc[-3][0];
495 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
498 w1 = mfc[3] + 1; /* Final +1 to skip C0 */
503 for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
505 d2 = w_1[i] - _w_1[i];
513 feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
516 assert(feat_n_stream(fcb) == 1);
517 assert(feat_window_size(fcb) == 0);
520 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
524 feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
531 assert(feat_n_stream(fcb) == 1);
532 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
533 assert(feat_window_size(fcb) == 2);
536 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
539 * DCEP: mfc[2] - mfc[-2];
541 f = feat[0] + feat_cepsize(fcb);
545 for (i = 0; i < feat_cepsize(fcb); i++)
550 feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
554 mfcc_t *w1, *w_1, *_w1, *_w_1;
559 assert(feat_n_stream(fcb) == 1);
560 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3);
561 assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1);
564 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
567 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
569 f = feat[0] + feat_cepsize(fcb);
570 w = mfc[FEAT_DCEP_WIN];
571 _w = mfc[-FEAT_DCEP_WIN];
573 for (i = 0; i < feat_cepsize(fcb); i++)
577 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
578 * where w = FEAT_DCEP_WIN
580 f += feat_cepsize(fcb);
582 w1 = mfc[FEAT_DCEP_WIN + 1];
583 _w1 = mfc[-FEAT_DCEP_WIN + 1];
584 w_1 = mfc[FEAT_DCEP_WIN - 1];
585 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
587 for (i = 0; i < feat_cepsize(fcb); i++) {
589 d2 = w_1[i] - _w_1[i];
596 feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
600 mfcc_t *w1, *w_1, *_w1, *_w_1;
605 assert(feat_n_stream(fcb) == 1);
606 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
607 assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);
610 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
613 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
615 f = feat[0] + feat_cepsize(fcb);
616 w = mfc[FEAT_DCEP_WIN];
617 _w = mfc[-FEAT_DCEP_WIN];
619 for (i = 0; i < feat_cepsize(fcb); i++)
623 * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
625 f += feat_cepsize(fcb);
626 w = mfc[FEAT_DCEP_WIN * 2];
627 _w = mfc[-FEAT_DCEP_WIN * 2];
629 for (i = 0; i < feat_cepsize(fcb); i++)
633 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
634 * where w = FEAT_DCEP_WIN
636 f += feat_cepsize(fcb);
638 w1 = mfc[FEAT_DCEP_WIN + 1];
639 _w1 = mfc[-FEAT_DCEP_WIN + 1];
640 w_1 = mfc[FEAT_DCEP_WIN - 1];
641 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
643 for (i = 0; i < feat_cepsize(fcb); i++) {
645 d2 = w_1[i] - _w_1[i];
652 feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
656 win = feat_window_size(fcb);
658 /* Concatenate input features */
659 for (i = -win; i <= win; ++i) {
662 for (j = 0; j < feat_n_stream(fcb); ++j) {
665 /* Unscale the stream length by the window. */
666 stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
667 memcpy(feat[j] + ((i + win) * stream_len),
669 stream_len * sizeof(mfcc_t));
676 feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
677 agc_type_t agc, int32 breport, int32 cepsize)
685 ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
686 type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);
688 fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
690 fcb->name = (char *) ckd_salloc(type);
691 if (strcmp(type, "s2_4x") == 0) {
692 /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
694 E_ERROR("s2_4x features require cepsize == 13\n");
700 fcb->stream_len = (int32 *) ckd_calloc(4, sizeof(int32));
701 fcb->stream_len[0] = 12;
702 fcb->stream_len[1] = 24;
703 fcb->stream_len[2] = 3;
704 fcb->stream_len[3] = 12;
706 fcb->window_size = 4;
707 fcb->compute_feat = feat_s2_4x_cep2feat;
709 else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
710 /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
712 E_ERROR("s2_4x features require cepsize == 13\n");
718 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32));
719 fcb->stream_len[0] = 39;
721 fcb->window_size = 3;
722 fcb->compute_feat = feat_s3_1x39_cep2feat;
724 else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
725 fcb->cepsize = cepsize;
727 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32));
728 fcb->stream_len[0] = cepsize * 3;
729 fcb->out_dim = cepsize * 3;
730 fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
731 fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
733 else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
734 fcb->cepsize = cepsize;
736 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32));
737 fcb->stream_len[0] = cepsize * 4;
738 fcb->out_dim = cepsize * 4;
739 fcb->window_size = FEAT_DCEP_WIN * 2;
740 fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
742 else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
743 /* 1-stream cep/dcep */
744 fcb->cepsize = cepsize;
746 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32));
747 fcb->stream_len[0] = feat_cepsize(fcb) * 2;
748 fcb->out_dim = fcb->stream_len[0];
749 fcb->window_size = 2;
750 fcb->compute_feat = feat_s3_cep_dcep;
752 else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
754 fcb->cepsize = cepsize;
756 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32));
757 fcb->stream_len[0] = feat_cepsize(fcb);
758 fcb->out_dim = fcb->stream_len[0];
759 fcb->window_size = 0;
760 fcb->compute_feat = feat_s3_cep;
762 else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
763 /* 1-stream cep with frames concatenated, so called cepwin features */
764 if (strncmp(type, "1s_3c", 5) == 0)
765 fcb->window_size = 3;
767 fcb->window_size = 4;
769 fcb->cepsize = cepsize;
771 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32));
772 fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
773 fcb->out_dim = fcb->stream_len[0];
774 fcb->compute_feat = feat_copy;
779 char *mtype = ckd_salloc(type);
780 char *wd = ckd_salloc(type);
782 * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
783 * comma separated list of feature stream widths; #items =
784 * #streams). An optional window size (frames will be
785 * concatenated) is also allowed, which can be specified with
786 * a colon after the list of feature streams.
790 for (i = 1; i < l - 1; i++) {
791 if (mtype[i] == ',') {
795 else if (mtype[i] == ':') {
797 fcb->window_size = atoi(mtype + i + 1);
801 k++; /* Presumably there are (#commas+1) streams */
803 fcb->stream_len = (int32 *) ckd_calloc(k, sizeof(int32));
805 /* Scan individual feature stream lengths */
810 while (sscanf(strp, "%s%n", wd, &l) == 1) {
812 if ((i >= fcb->n_stream)
813 || (sscanf(wd, "%d", &(fcb->stream_len[i])) != 1)
814 || (fcb->stream_len[i] <= 0))
815 E_FATAL("Bad feature type argument\n");
816 /* Input size before windowing */
817 fcb->cepsize += fcb->stream_len[i];
818 if (fcb->window_size > 0)
819 fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
820 /* Output size after windowing */
821 fcb->out_dim += fcb->stream_len[i];
824 if (i != fcb->n_stream)
825 E_FATAL("Bad feature type argument\n");
826 if (fcb->cepsize != cepsize)
827 E_FATAL("Bad feature type argument\n");
829 /* Input is already the feature stream */
830 fcb->compute_feat = feat_copy;
836 fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
838 fcb->varnorm = varnorm;
839 if (agc != AGC_NONE) {
840 fcb->agc_struct = agc_init();
842 * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
843 * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
846 /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
847 agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
851 * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
853 fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
856 /* This one is actually just an array of pointers to "flatten out"
858 fcb->tmpcepbuf = ckd_calloc(2 * feat_window_size(fcb) + 1,
859 sizeof(*fcb->tmpcepbuf));
866 feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
870 for (i = 0; i < nfr; i++) {
871 fprintf(fp, "%8d:\n", i);
873 for (j = 0; j < feat_dimension1(fcb); j++) {
874 fprintf(fp, "\t%2d:", j);
876 for (k = 0; k < feat_dimension2(fcb, j); k++)
877 fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
886 feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
888 cmn_type_t cmn_type = fcb->cmn;
890 if (!(beginutt && endutt)
891 && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */
892 cmn_type = CMN_PRIOR;
896 cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
899 cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
901 cmn_prior_update(fcb->cmn_struct);
906 cep_dump_dbg(fcb, mfc, nfr, "After CMN");
910 feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
912 agc_type_t agc_type = fcb->agc;
914 if (!(beginutt && endutt)
915 && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
920 agc_max(fcb->agc_struct, mfc, nfr);
923 agc_emax(fcb->agc_struct, mfc, nfr);
925 agc_emax_update(fcb->agc_struct);
928 agc_noise(fcb->agc_struct, mfc, nfr);
933 cep_dump_dbg(fcb, mfc, nfr, "After AGC");
937 feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
941 cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)");
943 /* Create feature vectors */
944 for (i = win; i < nfr - win; i++) {
945 fcb->compute_feat(fcb, mfc + i, feat[i - win]);
948 feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation");
951 feat_lda_transform(fcb, feat, nfr - win * 2);
952 feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA");
956 feat_subvec_project(fcb, feat, nfr - win * 2);
957 feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection");
963 * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data).
964 * If out_mfc is NULL, no actual reading will be done, and the number of
965 * frames (plus padding) that would be read is returned.
967 * It's important that normalization is done before padding because
968 * frames outside the data we are interested in shouldn't be taken
969 * into normalization stats.
971 * @return # frames read (plus padding) if successful, -1 if
972 * error (e.g., mfc array too small).
975 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
986 int32 start_pad, end_pad;
989 /* Initialize the output pointer to NULL, so that any attempts to
990 free() it if we fail before allocating it will not segfault! */
993 E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
994 if (ef >= 0 && ef <= sf) {
995 E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
999 /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
1000 if ((stat_retry(file, &statbuf) < 0)
1001 || ((fp = fopen(file, "rb")) == NULL)) {
1002 E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno));
1006 /* Read #floats in header */
1007 if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
1008 E_ERROR("%s: fread(#floats) failed\n", file);
1013 /* Check if n_float32 matches file size */
1015 if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
1019 if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */
1021 ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
1022 file, n_float32, n_float32, statbuf.st_size,
1031 if (n_float32 <= 0) {
1032 E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
1037 /* Convert n to #frames of input */
1038 n = n_float32 / cepsize;
1039 if (n * cepsize != n_float32) {
1040 E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
1046 /* Check start and end frames */
1049 E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
1058 E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
1063 /* Add window to start and end frames */
1073 end_pad = ef - n + 1;
1079 /* Limit n if indicated by [sf..ef] */
1080 if ((ef - sf + 1) < n)
1082 if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
1083 E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
1084 file, maxfr, n + start_pad + end_pad);
1089 /* If no output buffer was supplied, then skip the actual data reading. */
1090 if (out_mfc != NULL) {
1091 /* Position at desired start frame and read actual MFC data */
1092 mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
1094 fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
1095 n_float32 = n * cepsize;
1097 float_feat = ckd_calloc(n_float32, sizeof(float32));
1099 float_feat = mfc[start_pad];
1101 if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
1102 E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
1108 for (i = 0; i < n_float32; i++) {
1109 SWAP_FLOAT32(&float_feat[i]);
1113 for (i = 0; i < n_float32; ++i) {
1114 mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
1116 ckd_free(float_feat);
1120 feat_cmn(fcb, mfc + start_pad, n, 1, 1);
1121 feat_agc(fcb, mfc + start_pad, n, 1, 1);
1123 /* Replicate start and end frames if necessary. */
1124 for (i = 0; i < start_pad; ++i)
1125 memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
1126 for (i = 0; i < end_pad; ++i)
1127 memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
1128 cepsize * sizeof(mfcc_t));
1134 return n + start_pad + end_pad;
1140 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
1141 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
1146 int32 file_length, cepext_length, path_length = 0;
1149 if (fcb->cepsize <= 0) {
1150 E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
1158 * Create mfc filename, combining file, dir and extension if
1163 * First we decide about the path. If dir is defined, then use
1164 * it. Otherwise assume the filename already contains the path.
1170 * This is not true but some 3rd party apps
1171 * may parse the output explicitly checking for this line
1173 E_INFO("At directory . (current directory)\n");
1176 E_INFO("At directory %s\n", dir);
1178 * Do not forget the path separator!
1180 path_length += strlen(dir) + 1;
1184 * Include cepext, if it's not already part of the filename.
1186 file_length = strlen(file);
1187 cepext_length = strlen(cepext);
1188 if ((file_length > cepext_length)
1189 && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
1195 * Do not forget the '\0'
1197 path_length += file_length + cepext_length + 1;
1198 path = (char*) ckd_calloc(path_length, sizeof(char));
1200 #ifdef HAVE_SNPRINTF
1202 * Paranoia is our best friend...
1204 while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
1205 path_length = file_length;
1206 path = (char*) ckd_realloc(path, path_length * sizeof(char));
1209 sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
1212 win = feat_window_size(fcb);
1213 /* Pad maxfr with win, so we read enough raw feature data to
1214 * calculate the requisite number of dynamic features. */
1219 /* Read mfc file including window or padding if necessary. */
1220 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
1223 ckd_free_2d((void **) mfc);
1227 /* Actually compute the features */
1228 feat_compute_utt(fcb, mfc, nfr, win, feat);
1230 ckd_free_2d((void **) mfc);
1233 /* Just calculate the number of frames we would need. */
1234 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
1241 return (nfr - win * 2);
1245 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
1246 int32 nfr, mfcc_t *** ofeat)
1249 int32 i, win, cepsize;
1251 win = feat_window_size(fcb);
1252 cepsize = feat_cepsize(fcb);
1254 /* Copy and pad out the utterance (this requires that the
1255 * feature computation functions always access the buffer via
1256 * the frame pointers, which they do) */
1257 cepbuf = ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
1258 memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));
1260 /* Do normalization before we interpolate on the boundary */
1261 feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
1262 feat_agc(fcb, cepbuf + win, nfr, 1, 1);
1264 /* Now interpolate */
1265 for (i = 0; i < win; ++i) {
1266 cepbuf[i] = fcb->cepbuf[i];
1267 memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
1268 cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
1269 memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
1271 /* Compute as usual. */
1272 feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
1278 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
1279 int32 beginutt, int32 endutt, mfcc_t *** ofeat)
1281 int32 win, cepsize, nbufcep;
1282 int32 i, j, nfeatvec;
1285 /* Avoid having to check this everywhere. */
1286 if (inout_ncep == NULL) inout_ncep = &zero;
1288 /* Special case for entire utterances. */
1289 if (beginutt && endutt && *inout_ncep > 0)
1290 return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
1292 win = feat_window_size(fcb);
1293 cepsize = feat_cepsize(fcb);
1295 /* Empty the input buffer on start of utterance. */
1297 fcb->bufpos = fcb->curpos;
1299 /* Calculate how much data is in the buffer already. */
1300 nbufcep = fcb->bufpos - fcb->curpos;
1302 nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
1303 /* Add any data that we have to replicate. */
1304 if (beginutt && *inout_ncep > 0)
1309 /* Only consume as much input as will fit in the buffer. */
1310 if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
1311 /* We also can't overwrite the trailing window, hence the
1312 * reason why win is subtracted here. */
1313 *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
1314 /* Cancel end of utterance processing. */
1318 /* FIXME: Don't modify the input! */
1319 feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
1320 feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
1322 /* Replicate first frame into the first win frames if we're at the
1323 * beginning of the utterance and there was some actual input to
1324 * deal with. (FIXME: Not entirely sure why that condition) */
1325 if (beginutt && *inout_ncep > 0) {
1326 for (i = 0; i < win; i++) {
1327 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
1328 cepsize * sizeof(mfcc_t));
1329 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1331 /* Move the current pointer past this data. */
1332 fcb->curpos = fcb->bufpos;
1336 /* Copy in frame data to the circular buffer. */
1337 for (i = 0; i < *inout_ncep; ++i) {
1338 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
1339 cepsize * sizeof(mfcc_t));
1340 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1344 /* Replicate last frame into the last win frames if we're at the
1345 * end of the utterance (even if there was no input, so we can
1346 * flush the output). */
1348 int32 tpos; /* Index of last input frame. */
1349 if (fcb->bufpos == 0)
1350 tpos = LIVEBUFBLOCKSIZE - 1;
1352 tpos = fcb->bufpos - 1;
1353 for (i = 0; i < win; ++i) {
1354 memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
1355 cepsize * sizeof(mfcc_t));
1356 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1360 /* We have to leave the trailing window of frames. */
1361 nfeatvec = nbufcep - win;
1363 return 0; /* Do nothing. */
1365 for (i = 0; i < nfeatvec; ++i) {
1366 /* Handle wraparound cases. */
1367 if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
1368 /* Use tmpcepbuf for this case. Actually, we just need the pointers. */
1369 for (j = -win; j <= win; ++j) {
1371 (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
1372 fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
1374 fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
1377 fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
1379 /* Move the read pointer forward. */
1381 fcb->curpos %= LIVEBUFBLOCKSIZE;
1385 feat_lda_transform(fcb, ofeat, nfeatvec);
1388 feat_subvec_project(fcb, ofeat, nfeatvec);
1394 feat_retain(feat_t *f)
1401 feat_free(feat_t * f)
1405 if (--f->refcount > 0)
1409 ckd_free_2d((void **) f->cepbuf);
1410 ckd_free(f->tmpcepbuf);
1413 ckd_free((void *) f->name);
1416 ckd_free_3d((void ***) f->lda);
1418 ckd_free(f->stream_len);
1419 ckd_free(f->sv_len);
1420 ckd_free(f->sv_buf);
1421 subvecs_free(f->subvecs);
1423 cmn_free(f->cmn_struct);
1424 agc_free(f->agc_struct);
1432 feat_report(feat_t * f)
1435 E_INFO_NOFN("Initialization of feat_t, report:\n");
1436 E_INFO_NOFN("Feature type = %s\n", f->name);
1437 E_INFO_NOFN("Cepstral size = %d\n", f->cepsize);
1438 E_INFO_NOFN("Number of streams = %d\n", f->n_stream);
1439 for (i = 0; i < f->n_stream; i++) {
1440 E_INFO_NOFN("Vector size of stream[%d]: %d\n", i,
1443 E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv);
1444 for (i = 0; i < f->n_sv; i++) {
1447 E_INFO_NOFN("Components of subvector[%d]:", i);
1448 for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
1449 E_INFOCONT(" %d", *sv);
1452 E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn);
1453 E_INFO_NOFN("Whether AGC is used = %d\n", f->agc);
1454 E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm);