1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
18 /****************************************************************************************
19 Portions of this file are derived from the following 3GPP standard:
22 ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23 Available from http://www.3gpp.org
25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26 Permission to distribute, modify and use this file under the standard license
27 terms listed above has been obtained from the copyright holder.
28 ****************************************************************************************/
30 ------------------------------------------------------------------------------
37 ------------------------------------------------------------------------------
40 Background noise source characteristic detector (SCD)
42 ------------------------------------------------------------------------------
46 /*----------------------------------------------------------------------------
48 ----------------------------------------------------------------------------*/
57 /*----------------------------------------------------------------------------
59 ; Define module specific macros here
60 ----------------------------------------------------------------------------*/
63 /*----------------------------------------------------------------------------
65 ; Include all pre-processor statements here. Include conditional
66 ; compile variables also.
67 ----------------------------------------------------------------------------*/
71 /*----------------------------------------------------------------------------
72 ; LOCAL FUNCTION DEFINITIONS
73 ; Function Prototype declaration
74 ----------------------------------------------------------------------------*/
76 /*----------------------------------------------------------------------------
77 ; LOCAL VARIABLE DEFINITIONS
78 ; Variable declaration - defined here and used outside this module
79 ----------------------------------------------------------------------------*/
83 ------------------------------------------------------------------------------
84 FUNCTION NAME: Bgn_scd_reset
85 ------------------------------------------------------------------------------
86 INPUT AND OUTPUT DEFINITIONS
89 state = points to memory of type Bgn_scdState.
92 The memory of type Bgn_scdState pointed to by state is set to all
96 Returns 0 if memory was successfully initialized,
99 Global Variables Used:
102 Local Variables Needed:
105 ------------------------------------------------------------------------------
110 ------------------------------------------------------------------------------
115 ------------------------------------------------------------------------------
118 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
120 ------------------------------------------------------------------------------
123 Word16 Bgn_scd_reset (Bgn_scdState *state)
125 if (state == (Bgn_scdState *) NULL){
126 fprintf(stderr, "Bgn_scd_reset: invalid parameter\n");
130 // Static vectors to zero
131 Set_zero (state->frameEnergyHist, L_ENERGYHIST);
133 // Initialize hangover handling
134 state->bgHangover = 0;
139 ------------------------------------------------------------------------------
141 [State any special notes, constraints or cautions for users of this function]
143 ------------------------------------------------------------------------------
146 Word16 Bgn_scd_reset(Bgn_scdState *state)
148 if (state == (Bgn_scdState *) NULL)
150 /* fprintf(stderr, "Bgn_scd_reset: invalid parameter\n"); */
154 /* Static vectors to zero */
155 oscl_memset(state->frameEnergyHist, 0, L_ENERGYHIST*sizeof(Word16));
157 /* Initialize hangover handling */
158 state->bgHangover = 0;
163 /****************************************************************************/
166 ------------------------------------------------------------------------------
167 FUNCTION NAME: Bgn_scd
168 ------------------------------------------------------------------------------
169 INPUT AND OUTPUT DEFINITIONS
172 st = pointer to state variables of type Bgn_scdState
173 ltpGainHist[] = LTP gain history (Word16)
174 speech[] = synthesis speech frame (Word16)
175 voicedHangover = pointer to # of frames after last voiced frame (Word16)
176 pOverflow = pointer to overflow indicator (Flag)
179 st = function updates the state variables of type Bgn_scdState
181 voicedHangover = function updates the # of frames after last voiced
182 frame pointed to by voicedHangover.
183 pOverflow = 1 if the basic math function L_add() results in saturation.
184 else pOverflow is zero.
187 inbgNoise = flag if background noise is present (Word16)
189 Global Variables Used:
192 Local Variables Needed:
195 ------------------------------------------------------------------------------
198 Characterize synthesis speech and detect background noise.
200 ------------------------------------------------------------------------------
205 ------------------------------------------------------------------------------
208 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
210 ------------------------------------------------------------------------------
213 Word16 Bgn_scd (Bgn_scdState *st, // i : State variables for bgn SCD
214 Word16 ltpGainHist[], // i : LTP gain history
215 Word16 speech[], // o : synthesis speech frame
216 Word16 *voicedHangover // o : # of frames after last
221 Word16 prevVoiced, inbgNoise;
223 Word16 ltpLimit, frameEnergyMin;
224 Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
227 // Update the inBackgroundNoise flag (valid for use in next frame if BFI)
228 // it now works as a energy detector floating on top
229 // not as good as a VAD.
234 for (i = 0; i < L_FRAME; i++)
236 s = L_mac (s, speech[i], speech[i]);
241 currEnergy = extract_h (s);
243 frameEnergyMin = 32767;
245 for (i = 0; i < L_ENERGYHIST; i++)
247 if (sub(st->frameEnergyHist[i], frameEnergyMin) < 0)
248 frameEnergyMin = st->frameEnergyHist[i];
251 noiseFloor = shl (frameEnergyMin, 4); // Frame Energy Margin of 16
253 maxEnergy = st->frameEnergyHist[0];
254 for (i = 1; i < L_ENERGYHIST-4; i++)
256 if ( sub (maxEnergy, st->frameEnergyHist[i]) < 0)
258 maxEnergy = st->frameEnergyHist[i];
262 maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
263 for (i = 2*L_ENERGYHIST/3+1; i < L_ENERGYHIST; i++)
265 if ( sub (maxEnergyLastPart, st->frameEnergyHist[i] ) < 0)
267 maxEnergyLastPart = st->frameEnergyHist[i];
271 inbgNoise = 0; // false
273 // Do not consider silence as noise
274 // Do not consider continuous high volume as noise
275 // Or if the current noise level is very low
276 // Mark as noise if under current noise limit
277 // OR if the maximum energy is below the upper limit
279 if ( (sub(maxEnergy, LOWERNOISELIMIT) > 0) &&
280 (sub(currEnergy, FRAMEENERGYLIMIT) < 0) &&
281 (sub(currEnergy, LOWERNOISELIMIT) > 0) &&
282 ( (sub(currEnergy, noiseFloor) < 0) ||
283 (sub(maxEnergyLastPart, UPPERNOISELIMIT) < 0)))
285 if (sub(add(st->bgHangover, 1), 30) > 0)
290 st->bgHangover = add(st->bgHangover, 1);
298 // make final decision about frame state , act somewhat cautiosly
299 if (sub(st->bgHangover,1) > 0)
300 inbgNoise = 1; // true
302 for (i = 0; i < L_ENERGYHIST-1; i++)
304 st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
306 st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
308 // prepare for voicing decision; tighten the threshold after some
310 ltpLimit = 13926; // 0.85 Q14
311 if (sub(st->bgHangover, 8) > 0)
313 ltpLimit = 15565; // 0.95 Q14
315 if (sub(st->bgHangover, 15) > 0)
317 ltpLimit = 16383; // 1.00 Q14
320 // weak sort of voicing indication.
321 prevVoiced = 0; // false
323 if (sub(gmed_n(<pGainHist[4], 5), ltpLimit) > 0)
325 prevVoiced = 1; // true
327 if (sub(st->bgHangover, 20) > 0) {
328 if (sub(gmed_n(ltpGainHist, 9), ltpLimit) > 0)
330 prevVoiced = 1; // true
334 prevVoiced = 0; // false
344 temp = add(*voicedHangover, 1);
345 if (sub(temp, 10) > 0)
347 *voicedHangover = 10;
351 *voicedHangover = temp;
358 ------------------------------------------------------------------------------
360 [State any special notes, constraints or cautions for users of this function]
362 ------------------------------------------------------------------------------
365 Word16 Bgn_scd(Bgn_scdState *st, /* i : State variables for bgn SCD */
366 Word16 ltpGainHist[], /* i : LTP gain history */
367 Word16 speech[], /* o : synthesis speech frame */
368 Word16 *voicedHangover,/* o : # of frames after last
374 Word16 prevVoiced, inbgNoise;
376 Word16 ltpLimit, frameEnergyMin;
377 Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
381 /* Update the inBackgroundNoise flag (valid for use in next frame if BFI) */
382 /* it now works as a energy detector floating on top */
383 /* not as good as a VAD. */
387 for (i = L_FRAME - 1; i >= 0; i--)
389 L_temp = ((Word32) speech[i]) * speech[i];
390 if (L_temp != (Word32) 0x40000000L)
392 L_temp = L_temp << 1;
398 s = L_add(s, L_temp, pOverflow);
401 /* s is a sum of squares, so don't need to check for neg overflow */
402 if (s > (Word32)0x1fffffffL)
408 currEnergy = (Word16)(s >> 14);
411 frameEnergyMin = 32767;
412 for (i = L_ENERGYHIST - 1; i >= 0; i--)
414 if (st->frameEnergyHist[i] < frameEnergyMin)
416 frameEnergyMin = st->frameEnergyHist[i];
420 /* Frame Energy Margin of 16 */
421 L_temp = (Word32)frameEnergyMin << 4;
422 if (L_temp != (Word32)((Word16) L_temp))
435 noiseFloor = (Word16)(L_temp);
438 maxEnergy = st->frameEnergyHist[0];
439 for (i = L_ENERGYHIST - 5; i >= 1; i--)
441 if (maxEnergy < st->frameEnergyHist[i])
443 maxEnergy = st->frameEnergyHist[i];
447 maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
448 for (i = 2 * L_ENERGYHIST / 3 + 1; i < L_ENERGYHIST; i++)
450 if (maxEnergyLastPart < st->frameEnergyHist[i])
452 maxEnergyLastPart = st->frameEnergyHist[i];
456 /* Do not consider silence as noise */
457 /* Do not consider continuous high volume as noise */
458 /* Or if the current noise level is very low */
459 /* Mark as noise if under current noise limit */
460 /* OR if the maximum energy is below the upper limit */
462 if ((maxEnergy > LOWERNOISELIMIT) &&
463 (currEnergy < FRAMEENERGYLIMIT) &&
464 (currEnergy > LOWERNOISELIMIT) &&
465 ((currEnergy < noiseFloor) ||
466 (maxEnergyLastPart < UPPERNOISELIMIT)))
468 if ((st->bgHangover + 1) > 30)
482 /* make final decision about frame state , act somewhat cautiosly */
484 if (st->bgHangover > 1)
493 for (i = 0; i < L_ENERGYHIST - 1; i++)
495 st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
497 st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
499 /* prepare for voicing decision; tighten the threshold after some
502 if (st->bgHangover > 15)
504 ltpLimit = 16383; /* 1.00 Q14 */
506 else if (st->bgHangover > 8)
508 ltpLimit = 15565; /* 0.95 Q14 */
512 ltpLimit = 13926; /* 0.85 Q14 */
515 /* weak sort of voicing indication. */
518 if (gmed_n(<pGainHist[4], 5) > ltpLimit)
523 if (st->bgHangover > 20)
525 if (gmed_n(ltpGainHist, 9) > ltpLimit)
542 temp = *voicedHangover + 1;
546 *voicedHangover = 10;
550 *voicedHangover = temp;