1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * ====================================================================
38 * agc.c -- Various forms of automatic gain control (AGC)
40 * **********************************************
41 * CMU ARPA Speech Project
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
49 * Revision 1.5 2005/06/21 19:25:41 arthchan2003
50 * 1, Fixed doxygen documentation. 2, Added $ keyword.
52 * Revision 1.3 2005/03/30 01:22:46 archan
53 * Fixed mistakes in last updates. Add
56 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
65 #include "sphinxbase/err.h"
66 #include "sphinxbase/ckd_alloc.h"
67 #include "sphinxbase/agc.h"
69 /* NOTE! These must match the enum in agc.h */
70 const char *agc_type_str[] = {
76 static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]);
79 agc_type_from_str(const char *str)
83 for (i = 0; i < n_agc_type_str; ++i) {
84 if (0 == strcmp(str, agc_type_str[i]))
87 E_FATAL("Unknown AGC type '%s'\n", str);
94 agc = ckd_calloc(1, sizeof(*agc));
95 agc->noise_thresh = FLOAT2MFCC(2.0);
100 void agc_free(agc_t *agc)
106 * Normalize c0 for all frames such that max(c0) = 0.
109 agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
115 agc->obs_max = mfc[0][0];
116 for (i = 1; i < n_frame; i++) {
117 if (mfc[i][0] > agc->obs_max) {
118 agc->obs_max = mfc[i][0];
123 E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max);
124 for (i = 0; i < n_frame; i++)
125 mfc[i][0] -= agc->obs_max;
129 agc_emax_set(agc_t *agc, float32 m)
131 agc->max = FLOAT2MFCC(m);
132 E_INFO("AGCEMax: max= %.2f\n", m);
136 agc_emax_get(agc_t *agc)
138 return MFCC2FLOAT(agc->max);
142 agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
148 for (i = 1; i < n_frame; ++i) {
149 if (mfc[i][0] > agc->obs_max) {
150 agc->obs_max = mfc[i][0];
153 mfc[i][0] -= agc->max;
157 /* Update estimated max for next utterance */
159 agc_emax_update(agc_t *agc)
161 if (agc->obs_frame) { /* Update only if some data observed */
162 agc->obs_max_sum += agc->obs_max;
165 /* Re-estimate max over past history; decay the history */
166 agc->max = agc->obs_max_sum / agc->obs_utt;
167 if (agc->obs_utt == 8) {
168 agc->obs_max_sum /= 2;
172 E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max);
174 /* Reset the accumulators for the next utterance. */
176 agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */
180 agc_noise(agc_t *agc,
184 mfcc_t min_energy; /* Minimum log-energy */
185 mfcc_t noise_level; /* Average noise_level */
186 int32 i; /* frame index */
187 int32 noise_frames; /* Number of noise frames */
189 /* Determine minimum log-energy in utterance */
190 min_energy = cep[0][0];
191 for (i = 0; i < nfr; ++i) {
192 if (cep[i][0] < min_energy)
193 min_energy = cep[i][0];
196 /* Average all frames between min_energy and min_energy + agc->noise_thresh */
199 min_energy += agc->noise_thresh;
200 for (i = 0; i < nfr; ++i) {
201 if (cep[i][0] < min_energy) {
202 noise_level += cep[i][0];
206 noise_level /= noise_frames;
208 E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level));
210 /* Subtract noise_level from all log_energy values */
211 for (i = 0; i < nfr; ++i)
212 cep[i][0] -= noise_level;
216 agc_set_threshold(agc_t *agc, float32 threshold)
218 agc->noise_thresh = FLOAT2MFCC(threshold);
222 agc_get_threshold(agc_t *agc)
224 return FLOAT2MFCC(agc->noise_thresh);