- add third_party src.
[platform/framework/web/crosswalk.git] / src / third_party / webrtc / modules / audio_processing / aecm / aecm_core.c
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h"
12
13 #include <assert.h>
14 #include <stddef.h>
15 #include <stdlib.h>
16
17 #include "webrtc/common_audio/signal_processing/include/real_fft.h"
18 #include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
19 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
20 #include "webrtc/modules/audio_processing/utility/ring_buffer.h"
21 #include "webrtc/system_wrappers/interface/compile_assert_c.h"
22 #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
23 #include "webrtc/typedefs.h"
24
25 #ifdef AEC_DEBUG
26 FILE *dfile;
27 FILE *testfile;
28 #endif
29
30 // Square root of Hanning window in Q14.
31 #if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
32 // Table is defined in an ARM assembly file.
33 extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
34 #else
35 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
36   0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
37   3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
38   6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
39   9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
40   11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
41   13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
42   15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
43   16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
44 };
45 #endif
46
47 #ifdef AECM_WITH_ABS_APPROX
48 //Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
49 static const uint16_t kAlpha1 = 32584;
50 //Q15 beta = 0.12967166976970   const Factor for magnitude approximation
51 static const uint16_t kBeta1 = 4249;
52 //Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
53 static const uint16_t kAlpha2 = 30879;
54 //Q15 beta = 0.33787806009150   const Factor for magnitude approximation
55 static const uint16_t kBeta2 = 11072;
56 //Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
57 static const uint16_t kAlpha3 = 26951;
58 //Q15 beta = 0.57762063060713   const Factor for magnitude approximation
59 static const uint16_t kBeta3 = 18927;
60 #endif
61
62 // Initialization table for echo channel in 8 kHz
63 static const int16_t kChannelStored8kHz[PART_LEN1] = {
64     2040,   1815,   1590,   1498,   1405,   1395,   1385,   1418,
65     1451,   1506,   1562,   1644,   1726,   1804,   1882,   1918,
66     1953,   1982,   2010,   2025,   2040,   2034,   2027,   2021,
67     2014,   1997,   1980,   1925,   1869,   1800,   1732,   1683,
68     1635,   1604,   1572,   1545,   1517,   1481,   1444,   1405,
69     1367,   1331,   1294,   1270,   1245,   1239,   1233,   1247,
70     1260,   1282,   1303,   1338,   1373,   1407,   1441,   1470,
71     1499,   1524,   1549,   1565,   1582,   1601,   1621,   1649,
72     1676
73 };
74
75 // Initialization table for echo channel in 16 kHz
76 static const int16_t kChannelStored16kHz[PART_LEN1] = {
77     2040,   1590,   1405,   1385,   1451,   1562,   1726,   1882,
78     1953,   2010,   2040,   2027,   2014,   1980,   1869,   1732,
79     1635,   1572,   1517,   1444,   1367,   1294,   1245,   1233,
80     1260,   1303,   1373,   1441,   1499,   1549,   1582,   1621,
81     1676,   1741,   1802,   1861,   1921,   1983,   2040,   2102,
82     2170,   2265,   2375,   2515,   2651,   2781,   2922,   3075,
83     3253,   3471,   3738,   3976,   4151,   4258,   4308,   4288,
84     4270,   4253,   4237,   4179,   4086,   3947,   3757,   3484,
85     3153
86 };
87
88 static const int16_t kCosTable[] = {
89     8192,  8190,  8187,  8180,  8172,  8160,  8147,  8130,  8112,
90     8091,  8067,  8041,  8012,  7982,  7948,  7912,  7874,  7834,
91     7791,  7745,  7697,  7647,  7595,  7540,  7483,  7424,  7362,
92     7299,  7233,  7164,  7094,  7021,  6947,  6870,  6791,  6710,
93     6627,  6542,  6455,  6366,  6275,  6182,  6087,  5991,  5892,
94     5792,  5690,  5586,  5481,  5374,  5265,  5155,  5043,  4930,
95     4815,  4698,  4580,  4461,  4341,  4219,  4096,  3971,  3845,
96     3719,  3591,  3462,  3331,  3200,  3068,  2935,  2801,  2667,
97     2531,  2395,  2258,  2120,  1981,  1842,  1703,  1563,  1422,
98     1281,  1140,   998,   856,   713,   571,   428,   285,   142,
99        0,  -142,  -285,  -428,  -571,  -713,  -856,  -998, -1140,
100    -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395,
101    -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591,
102    -3719, -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698,
103    -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, -5690,
104    -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, -6542,
105    -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233,
106    -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745,
107    -7791, -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067,
108    -8091, -8112, -8130, -8147, -8160, -8172, -8180, -8187, -8190,
109    -8191, -8190, -8187, -8180, -8172, -8160, -8147, -8130, -8112,
110    -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834,
111    -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362,
112    -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710,
113    -6627, -6542, -6455, -6366, -6275, -6182, -6087, -5991, -5892,
114    -5792, -5690, -5586, -5481, -5374, -5265, -5155, -5043, -4930,
115    -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, -3845,
116    -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667,
117    -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422,
118    -1281, -1140,  -998,  -856,  -713,  -571,  -428,  -285,  -142,
119        0,   142,   285,   428,   571,   713,   856,   998,  1140,
120     1281,  1422,  1563,  1703,  1842,  1981,  2120,  2258,  2395,
121     2531,  2667,  2801,  2935,  3068,  3200,  3331,  3462,  3591,
122     3719,  3845,  3971,  4095,  4219,  4341,  4461,  4580,  4698,
123     4815,  4930,  5043,  5155,  5265,  5374,  5481,  5586,  5690,
124     5792,  5892,  5991,  6087,  6182,  6275,  6366,  6455,  6542,
125     6627,  6710,  6791,  6870,  6947,  7021,  7094,  7164,  7233,
126     7299,  7362,  7424,  7483,  7540,  7595,  7647,  7697,  7745,
127     7791,  7834,  7874,  7912,  7948,  7982,  8012,  8041,  8067,
128     8091,  8112,  8130,  8147,  8160,  8172,  8180,  8187,  8190
129 };
130
131 static const int16_t kSinTable[] = {
132        0,    142,    285,    428,    571,    713,    856,    998,
133     1140,   1281,   1422,   1563,   1703,   1842,   1981,   2120,
134     2258,   2395,   2531,   2667,   2801,   2935,   3068,   3200,
135     3331,   3462,   3591,   3719,   3845,   3971,   4095,   4219,
136     4341,   4461,   4580,   4698,   4815,   4930,   5043,   5155,
137     5265,   5374,   5481,   5586,   5690,   5792,   5892,   5991,
138     6087,   6182,   6275,   6366,   6455,   6542,   6627,   6710,
139     6791,   6870,   6947,   7021,   7094,   7164,   7233,   7299,
140     7362,   7424,   7483,   7540,   7595,   7647,   7697,   7745,
141     7791,   7834,   7874,   7912,   7948,   7982,   8012,   8041,
142     8067,   8091,   8112,   8130,   8147,   8160,   8172,   8180,
143     8187,   8190,   8191,   8190,   8187,   8180,   8172,   8160,
144     8147,   8130,   8112,   8091,   8067,   8041,   8012,   7982,
145     7948,   7912,   7874,   7834,   7791,   7745,   7697,   7647,
146     7595,   7540,   7483,   7424,   7362,   7299,   7233,   7164,
147     7094,   7021,   6947,   6870,   6791,   6710,   6627,   6542,
148     6455,   6366,   6275,   6182,   6087,   5991,   5892,   5792,
149     5690,   5586,   5481,   5374,   5265,   5155,   5043,   4930,
150     4815,   4698,   4580,   4461,   4341,   4219,   4096,   3971,
151     3845,   3719,   3591,   3462,   3331,   3200,   3068,   2935,
152     2801,   2667,   2531,   2395,   2258,   2120,   1981,   1842,
153     1703,   1563,   1422,   1281,   1140,    998,    856,    713,
154      571,    428,    285,    142,      0,   -142,   -285,   -428,
155     -571,   -713,   -856,   -998,  -1140,  -1281,  -1422,  -1563,
156    -1703,  -1842,  -1981,  -2120,  -2258,  -2395,  -2531,  -2667,
157    -2801,  -2935,  -3068,  -3200,  -3331,  -3462,  -3591,  -3719,
158    -3845,  -3971,  -4095,  -4219,  -4341,  -4461,  -4580,  -4698,
159    -4815,  -4930,  -5043,  -5155,  -5265,  -5374,  -5481,  -5586,
160    -5690,  -5792,  -5892,  -5991,  -6087,  -6182,  -6275,  -6366,
161    -6455,  -6542,  -6627,  -6710,  -6791,  -6870,  -6947,  -7021,
162    -7094,  -7164,  -7233,  -7299,  -7362,  -7424,  -7483,  -7540,
163    -7595,  -7647,  -7697,  -7745,  -7791,  -7834,  -7874,  -7912,
164    -7948,  -7982,  -8012,  -8041,  -8067,  -8091,  -8112,  -8130,
165    -8147,  -8160,  -8172,  -8180,  -8187,  -8190,  -8191,  -8190,
166    -8187,  -8180,  -8172,  -8160,  -8147,  -8130,  -8112,  -8091,
167    -8067,  -8041,  -8012,  -7982,  -7948,  -7912,  -7874,  -7834,
168    -7791,  -7745,  -7697,  -7647,  -7595,  -7540,  -7483,  -7424,
169    -7362,  -7299,  -7233,  -7164,  -7094,  -7021,  -6947,  -6870,
170    -6791,  -6710,  -6627,  -6542,  -6455,  -6366,  -6275,  -6182,
171    -6087,  -5991,  -5892,  -5792,  -5690,  -5586,  -5481,  -5374,
172    -5265,  -5155,  -5043,  -4930,  -4815,  -4698,  -4580,  -4461,
173    -4341,  -4219,  -4096,  -3971,  -3845,  -3719,  -3591,  -3462,
174    -3331,  -3200,  -3068,  -2935,  -2801,  -2667,  -2531,  -2395,
175    -2258,  -2120,  -1981,  -1842,  -1703,  -1563,  -1422,  -1281,
176    -1140,   -998,   -856,   -713,   -571,   -428,   -285,   -142
177 };
178
179 static const int16_t kNoiseEstQDomain = 15;
180 static const int16_t kNoiseEstIncCount = 5;
181
182 static void ComfortNoise(AecmCore_t* aecm,
183                          const uint16_t* dfa,
184                          complex16_t* out,
185                          const int16_t* lambda);
186
187 static int16_t CalcSuppressionGain(AecmCore_t * const aecm);
188
189 // Moves the pointer to the next entry and inserts |far_spectrum| and
190 // corresponding Q-domain in its buffer.
191 //
192 // Inputs:
193 //      - self          : Pointer to the delay estimation instance
194 //      - far_spectrum  : Pointer to the far end spectrum
195 //      - far_q         : Q-domain of far end spectrum
196 //
197 static void UpdateFarHistory(AecmCore_t* self,
198                              uint16_t* far_spectrum,
199                              int far_q) {
200   // Get new buffer position
201   self->far_history_pos++;
202   if (self->far_history_pos >= MAX_DELAY) {
203     self->far_history_pos = 0;
204   }
205   // Update Q-domain buffer
206   self->far_q_domains[self->far_history_pos] = far_q;
207   // Update far end spectrum buffer
208   memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]),
209          far_spectrum,
210          sizeof(uint16_t) * PART_LEN1);
211 }
212
213 // Returns a pointer to the far end spectrum aligned to current near end
214 // spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
215 // called before AlignedFarend(...). Otherwise, you get the pointer to the
216 // previous frame. The memory is only valid until the next call of
217 // WebRtc_DelayEstimatorProcessFix(...).
218 //
219 // Inputs:
220 //      - self              : Pointer to the AECM instance.
221 //      - delay             : Current delay estimate.
222 //
223 // Output:
224 //      - far_q             : The Q-domain of the aligned far end spectrum
225 //
226 // Return value:
227 //      - far_spectrum      : Pointer to the aligned far end spectrum
228 //                            NULL - Error
229 //
230 static const uint16_t* AlignedFarend(AecmCore_t* self, int* far_q, int delay) {
231   int buffer_position = 0;
232   assert(self != NULL);
233   buffer_position = self->far_history_pos - delay;
234
235   // Check buffer position
236   if (buffer_position < 0) {
237     buffer_position += MAX_DELAY;
238   }
239   // Get Q-domain
240   *far_q = self->far_q_domains[buffer_position];
241   // Return far end spectrum
242   return &(self->far_history[buffer_position * PART_LEN1]);
243 }
244
245 // Declare function pointers.
246 CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
247 StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
248 ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
249
250 int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
251 {
252     AecmCore_t *aecm = malloc(sizeof(AecmCore_t));
253     *aecmInst = aecm;
254     if (aecm == NULL)
255     {
256         return -1;
257     }
258
259     aecm->farFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
260                                             sizeof(int16_t));
261     if (!aecm->farFrameBuf)
262     {
263         WebRtcAecm_FreeCore(aecm);
264         aecm = NULL;
265         return -1;
266     }
267
268     aecm->nearNoisyFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
269                                                   sizeof(int16_t));
270     if (!aecm->nearNoisyFrameBuf)
271     {
272         WebRtcAecm_FreeCore(aecm);
273         aecm = NULL;
274         return -1;
275     }
276
277     aecm->nearCleanFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
278                                                   sizeof(int16_t));
279     if (!aecm->nearCleanFrameBuf)
280     {
281         WebRtcAecm_FreeCore(aecm);
282         aecm = NULL;
283         return -1;
284     }
285
286     aecm->outFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
287                                             sizeof(int16_t));
288     if (!aecm->outFrameBuf)
289     {
290         WebRtcAecm_FreeCore(aecm);
291         aecm = NULL;
292         return -1;
293     }
294
295     aecm->delay_estimator_farend = WebRtc_CreateDelayEstimatorFarend(PART_LEN1,
296                                                                      MAX_DELAY);
297     if (aecm->delay_estimator_farend == NULL) {
298       WebRtcAecm_FreeCore(aecm);
299       aecm = NULL;
300       return -1;
301     }
302     aecm->delay_estimator =
303         WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0);
304     if (aecm->delay_estimator == NULL) {
305       WebRtcAecm_FreeCore(aecm);
306       aecm = NULL;
307       return -1;
308     }
309
310     aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT);
311     if (aecm->real_fft == NULL) {
312       WebRtcAecm_FreeCore(aecm);
313       aecm = NULL;
314       return -1;
315     }
316
317     // Init some aecm pointers. 16 and 32 byte alignment is only necessary
318     // for Neon code currently.
319     aecm->xBuf = (int16_t*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31);
320     aecm->dBufClean = (int16_t*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31);
321     aecm->dBufNoisy = (int16_t*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31);
322     aecm->outBuf = (int16_t*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15);
323     aecm->channelStored = (int16_t*) (((uintptr_t)
324                                              aecm->channelStored_buf + 15) & ~ 15);
325     aecm->channelAdapt16 = (int16_t*) (((uintptr_t)
326                                               aecm->channelAdapt16_buf + 15) & ~ 15);
327     aecm->channelAdapt32 = (int32_t*) (((uintptr_t)
328                                               aecm->channelAdapt32_buf + 31) & ~ 31);
329
330     return 0;
331 }
332
333 void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const int16_t* echo_path)
334 {
335     int i = 0;
336
337     // Reset the stored channel
338     memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1);
339     // Reset the adapted channels
340     memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1);
341     for (i = 0; i < PART_LEN1; i++)
342     {
343         aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
344             (int32_t)(aecm->channelAdapt16[i]), 16);
345     }
346
347     // Reset channel storing variables
348     aecm->mseAdaptOld = 1000;
349     aecm->mseStoredOld = 1000;
350     aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX;
351     aecm->mseChannelCount = 0;
352 }
353
354 static void WindowAndFFT(AecmCore_t* aecm,
355                           int16_t* fft,
356                           const int16_t* time_signal,
357                           complex16_t* freq_signal,
358                           int time_signal_scaling) {
359   int i = 0;
360
361   // FFT of signal
362   for (i = 0; i < PART_LEN; i++) {
363     // Window time domain signal and insert into real part of
364     // transformation array |fft|
365     fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
366         (time_signal[i] << time_signal_scaling),
367         WebRtcAecm_kSqrtHanning[i],
368         14);
369     fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
370         (time_signal[i + PART_LEN] << time_signal_scaling),
371         WebRtcAecm_kSqrtHanning[PART_LEN - i],
372         14);
373   }
374
375   // Do forward FFT, then take only the first PART_LEN complex samples,
376   // and change signs of the imaginary parts.
377   WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
378   for (i = 0; i < PART_LEN; i++) {
379     freq_signal[i].imag = -freq_signal[i].imag;
380   }
381 }
382
383 static void InverseFFTAndWindow(AecmCore_t* aecm,
384                                 int16_t* fft,
385                                 complex16_t* efw,
386                                 int16_t* output,
387                                 const int16_t* nearendClean)
388 {
389     int i, j, outCFFT;
390     int32_t tmp32no1;
391     // Reuse |efw| for the inverse FFT output after transferring
392     // the contents to |fft|.
393     int16_t* ifft_out = (int16_t*)efw;
394
395     // Synthesis
396     for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
397       fft[j] = efw[i].real;
398       fft[j + 1] = -efw[i].imag;
399     }
400     fft[0] = efw[0].real;
401     fft[1] = -efw[0].imag;
402
403     fft[PART_LEN2] = efw[PART_LEN].real;
404     fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
405
406     // Inverse FFT. Keep outCFFT to scale the samples in the next block.
407     outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
408     for (i = 0; i < PART_LEN; i++) {
409       ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
410           ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
411       tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
412                                       outCFFT - aecm->dfaCleanQDomain);
413       output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
414           tmp32no1 + aecm->outBuf[i], WEBRTC_SPL_WORD16_MIN);
415
416       tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(ifft_out[PART_LEN + i],
417           WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
418       tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
419           outCFFT - aecm->dfaCleanQDomain);
420       aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(
421           WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN);
422     }
423
424     // Copy the current block to the old position (aecm->outBuf is shifted elsewhere)
425     memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
426     memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, sizeof(int16_t) * PART_LEN);
427     if (nearendClean != NULL)
428     {
429         memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, sizeof(int16_t) * PART_LEN);
430     }
431 }
432
433 static void CalcLinearEnergiesC(AecmCore_t* aecm,
434                                 const uint16_t* far_spectrum,
435                                 int32_t* echo_est,
436                                 uint32_t* far_energy,
437                                 uint32_t* echo_energy_adapt,
438                                 uint32_t* echo_energy_stored)
439 {
440     int i;
441
442     // Get energy for the delayed far end signal and estimated
443     // echo using both stored and adapted channels.
444     for (i = 0; i < PART_LEN1; i++)
445     {
446         echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
447                                            far_spectrum[i]);
448         (*far_energy) += (uint32_t)(far_spectrum[i]);
449         (*echo_energy_adapt) += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[i],
450                                           far_spectrum[i]);
451         (*echo_energy_stored) += (uint32_t)echo_est[i];
452     }
453 }
454
455 static void StoreAdaptiveChannelC(AecmCore_t* aecm,
456                                   const uint16_t* far_spectrum,
457                                   int32_t* echo_est)
458 {
459     int i;
460
461     // During startup we store the channel every block.
462     memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(int16_t) * PART_LEN1);
463     // Recalculate echo estimate
464     for (i = 0; i < PART_LEN; i += 4)
465     {
466         echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
467                                            far_spectrum[i]);
468         echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
469                                            far_spectrum[i + 1]);
470         echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
471                                            far_spectrum[i + 2]);
472         echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
473                                            far_spectrum[i + 3]);
474     }
475     echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
476                                        far_spectrum[i]);
477 }
478
479 static void ResetAdaptiveChannelC(AecmCore_t* aecm)
480 {
481     int i;
482
483     // The stored channel has a significantly lower MSE than the adaptive one for
484     // two consecutive calculations. Reset the adaptive channel.
485     memcpy(aecm->channelAdapt16, aecm->channelStored,
486            sizeof(int16_t) * PART_LEN1);
487     // Restore the W32 channel
488     for (i = 0; i < PART_LEN; i += 4)
489     {
490         aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
491                 (int32_t)aecm->channelStored[i], 16);
492         aecm->channelAdapt32[i + 1] = WEBRTC_SPL_LSHIFT_W32(
493                 (int32_t)aecm->channelStored[i + 1], 16);
494         aecm->channelAdapt32[i + 2] = WEBRTC_SPL_LSHIFT_W32(
495                 (int32_t)aecm->channelStored[i + 2], 16);
496         aecm->channelAdapt32[i + 3] = WEBRTC_SPL_LSHIFT_W32(
497                 (int32_t)aecm->channelStored[i + 3], 16);
498     }
499     aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)aecm->channelStored[i], 16);
500 }
501
502 // Initialize function pointers for ARM Neon platform.
503 #if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
504 static void WebRtcAecm_InitNeon(void)
505 {
506   WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
507   WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon;
508   WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon;
509 }
510 #endif
511
512 // WebRtcAecm_InitCore(...)
513 //
514 // This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
515 // Input:
516 //      - aecm            : Pointer to the Echo Suppression instance
517 //      - samplingFreq   : Sampling Frequency
518 //
519 // Output:
520 //      - aecm            : Initialized instance
521 //
522 // Return value         :  0 - Ok
523 //                        -1 - Error
524 //
525 int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
526 {
527     int i = 0;
528     int32_t tmp32 = PART_LEN1 * PART_LEN1;
529     int16_t tmp16 = PART_LEN1;
530
531     if (samplingFreq != 8000 && samplingFreq != 16000)
532     {
533         samplingFreq = 8000;
534         return -1;
535     }
536     // sanity check of sampling frequency
537     aecm->mult = (int16_t)samplingFreq / 8000;
538
539     aecm->farBufWritePos = 0;
540     aecm->farBufReadPos = 0;
541     aecm->knownDelay = 0;
542     aecm->lastKnownDelay = 0;
543
544     WebRtc_InitBuffer(aecm->farFrameBuf);
545     WebRtc_InitBuffer(aecm->nearNoisyFrameBuf);
546     WebRtc_InitBuffer(aecm->nearCleanFrameBuf);
547     WebRtc_InitBuffer(aecm->outFrameBuf);
548
549     memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf));
550     memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf));
551     memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf));
552     memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf));
553
554     aecm->seed = 666;
555     aecm->totCount = 0;
556
557     if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) {
558       return -1;
559     }
560     if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) {
561       return -1;
562     }
563     // Set far end histories to zero
564     memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY);
565     memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY);
566     aecm->far_history_pos = MAX_DELAY;
567
568     aecm->nlpFlag = 1;
569     aecm->fixedDelay = -1;
570
571     aecm->dfaCleanQDomain = 0;
572     aecm->dfaCleanQDomainOld = 0;
573     aecm->dfaNoisyQDomain = 0;
574     aecm->dfaNoisyQDomainOld = 0;
575
576     memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy));
577     aecm->farLogEnergy = 0;
578     memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy));
579     memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy));
580
581     // Initialize the echo channels with a stored shape.
582     if (samplingFreq == 8000)
583     {
584         WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz);
585     }
586     else
587     {
588         WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz);
589     }
590
591     memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt));
592     memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt));
593     aecm->noiseEstCtr = 0;
594
595     aecm->cngMode = AecmTrue;
596
597     memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr));
598     memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr));
599     // Shape the initial noise level to an approximate pink noise.
600     for (i = 0; i < (PART_LEN1 >> 1) - 1; i++)
601     {
602         aecm->noiseEst[i] = (tmp32 << 8);
603         tmp16--;
604         tmp32 -= (int32_t)((tmp16 << 1) + 1);
605     }
606     for (; i < PART_LEN1; i++)
607     {
608         aecm->noiseEst[i] = (tmp32 << 8);
609     }
610
611     aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX;
612     aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN;
613     aecm->farEnergyMaxMin = 0;
614     aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the
615                                          // beginning.
616     aecm->farEnergyMSE = 0;
617     aecm->currentVADValue = 0;
618     aecm->vadUpdateCount = 0;
619     aecm->firstVAD = 1;
620
621     aecm->startupState = 0;
622     aecm->supGain = SUPGAIN_DEFAULT;
623     aecm->supGainOld = SUPGAIN_DEFAULT;
624
625     aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
626     aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
627     aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
628     aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
629
630     // Assert a preprocessor definition at compile-time. It's an assumption
631     // used in assembly code, so check the assembly files before any change.
632     COMPILE_ASSERT(PART_LEN % 16 == 0);
633
634     // Initialize function pointers.
635     WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC;
636     WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC;
637     WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC;
638
639 #ifdef WEBRTC_DETECT_ARM_NEON
640     uint64_t features = WebRtc_GetCPUFeaturesARM();
641     if ((features & kCPUFeatureNEON) != 0)
642     {
643       WebRtcAecm_InitNeon();
644     }
645 #elif defined(WEBRTC_ARCH_ARM_NEON)
646     WebRtcAecm_InitNeon();
647 #endif
648
649     return 0;
650 }
651
652 // TODO(bjornv): This function is currently not used. Add support for these
653 // parameters from a higher level
654 int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag)
655 {
656     aecm->nlpFlag = nlpFlag;
657     aecm->fixedDelay = delay;
658
659     return 0;
660 }
661
662 int WebRtcAecm_FreeCore(AecmCore_t *aecm)
663 {
664     if (aecm == NULL)
665     {
666         return -1;
667     }
668
669     WebRtc_FreeBuffer(aecm->farFrameBuf);
670     WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf);
671     WebRtc_FreeBuffer(aecm->nearCleanFrameBuf);
672     WebRtc_FreeBuffer(aecm->outFrameBuf);
673
674     WebRtc_FreeDelayEstimator(aecm->delay_estimator);
675     WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend);
676     WebRtcSpl_FreeRealFFT(aecm->real_fft);
677
678     free(aecm);
679
680     return 0;
681 }
682
683 int WebRtcAecm_ProcessFrame(AecmCore_t * aecm,
684                             const int16_t * farend,
685                             const int16_t * nearendNoisy,
686                             const int16_t * nearendClean,
687                             int16_t * out)
688 {
689     int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary.
690     int16_t* outBlock = (int16_t*) (((uintptr_t) outBlock_buf + 15) & ~ 15);
691
692     int16_t farFrame[FRAME_LEN];
693     const int16_t* out_ptr = NULL;
694     int size = 0;
695
696     // Buffer the current frame.
697     // Fetch an older one corresponding to the delay.
698     WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN);
699     WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay);
700
701     // Buffer the synchronized far and near frames,
702     // to pass the smaller blocks individually.
703     WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN);
704     WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN);
705     if (nearendClean != NULL)
706     {
707         WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN);
708     }
709
710     // Process as many blocks as possible.
711     while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN)
712     {
713         int16_t far_block[PART_LEN];
714         const int16_t* far_block_ptr = NULL;
715         int16_t near_noisy_block[PART_LEN];
716         const int16_t* near_noisy_block_ptr = NULL;
717
718         WebRtc_ReadBuffer(aecm->farFrameBuf, (void**) &far_block_ptr, far_block,
719                           PART_LEN);
720         WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf,
721                           (void**) &near_noisy_block_ptr,
722                           near_noisy_block,
723                           PART_LEN);
724         if (nearendClean != NULL)
725         {
726             int16_t near_clean_block[PART_LEN];
727             const int16_t* near_clean_block_ptr = NULL;
728
729             WebRtc_ReadBuffer(aecm->nearCleanFrameBuf,
730                               (void**) &near_clean_block_ptr,
731                               near_clean_block,
732                               PART_LEN);
733             if (WebRtcAecm_ProcessBlock(aecm,
734                                         far_block_ptr,
735                                         near_noisy_block_ptr,
736                                         near_clean_block_ptr,
737                                         outBlock) == -1)
738             {
739                 return -1;
740             }
741         } else
742         {
743             if (WebRtcAecm_ProcessBlock(aecm,
744                                         far_block_ptr,
745                                         near_noisy_block_ptr,
746                                         NULL,
747                                         outBlock) == -1)
748             {
749                 return -1;
750             }
751         }
752
753         WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN);
754     }
755
756     // Stuff the out buffer if we have less than a frame to output.
757     // This should only happen for the first frame.
758     size = (int) WebRtc_available_read(aecm->outFrameBuf);
759     if (size < FRAME_LEN)
760     {
761         WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN);
762     }
763
764     // Obtain an output frame.
765     WebRtc_ReadBuffer(aecm->outFrameBuf, (void**) &out_ptr, out, FRAME_LEN);
766     if (out_ptr != out) {
767       // ReadBuffer() hasn't copied to |out| in this case.
768       memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t));
769     }
770
771     return 0;
772 }
773
774 // WebRtcAecm_AsymFilt(...)
775 //
776 // Performs asymmetric filtering.
777 //
778 // Inputs:
779 //      - filtOld       : Previous filtered value.
780 //      - inVal         : New input value.
781 //      - stepSizePos   : Step size when we have a positive contribution.
782 //      - stepSizeNeg   : Step size when we have a negative contribution.
783 //
784 // Output:
785 //
786 // Return: - Filtered value.
787 //
788 int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, const int16_t inVal,
789                             const int16_t stepSizePos,
790                             const int16_t stepSizeNeg)
791 {
792     int16_t retVal;
793
794     if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN))
795     {
796         return inVal;
797     }
798     retVal = filtOld;
799     if (filtOld > inVal)
800     {
801         retVal -= WEBRTC_SPL_RSHIFT_W16(filtOld - inVal, stepSizeNeg);
802     } else
803     {
804         retVal += WEBRTC_SPL_RSHIFT_W16(inVal - filtOld, stepSizePos);
805     }
806
807     return retVal;
808 }
809
810 // WebRtcAecm_CalcEnergies(...)
811 //
812 // This function calculates the log of energies for nearend, farend and estimated
813 // echoes. There is also an update of energy decision levels, i.e. internal VAD.
814 //
815 //
816 // @param  aecm         [i/o]   Handle of the AECM instance.
817 // @param  far_spectrum [in]    Pointer to farend spectrum.
818 // @param  far_q        [in]    Q-domain of farend spectrum.
819 // @param  nearEner     [in]    Near end energy for current block in
820 //                              Q(aecm->dfaQDomain).
821 // @param  echoEst      [out]   Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
822 //
823 void WebRtcAecm_CalcEnergies(AecmCore_t * aecm,
824                              const uint16_t* far_spectrum,
825                              const int16_t far_q,
826                              const uint32_t nearEner,
827                              int32_t * echoEst)
828 {
829     // Local variables
830     uint32_t tmpAdapt = 0;
831     uint32_t tmpStored = 0;
832     uint32_t tmpFar = 0;
833
834     int i;
835
836     int16_t zeros, frac;
837     int16_t tmp16;
838     int16_t increase_max_shifts = 4;
839     int16_t decrease_max_shifts = 11;
840     int16_t increase_min_shifts = 11;
841     int16_t decrease_min_shifts = 3;
842     int16_t kLogLowValue = WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7);
843
844     // Get log of near end energy and store in buffer
845
846     // Shift buffer
847     memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy,
848             sizeof(int16_t) * (MAX_BUF_LEN - 1));
849
850     // Logarithm of integrated magnitude spectrum (nearEner)
851     tmp16 = kLogLowValue;
852     if (nearEner)
853     {
854         zeros = WebRtcSpl_NormU32(nearEner);
855         frac = (int16_t)WEBRTC_SPL_RSHIFT_U32(
856                               (WEBRTC_SPL_LSHIFT_U32(nearEner, zeros) & 0x7FFFFFFF),
857                               23);
858         // log2 in Q8
859         tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
860         tmp16 -= WEBRTC_SPL_LSHIFT_W16(aecm->dfaNoisyQDomain, 8);
861     }
862     aecm->nearLogEnergy[0] = tmp16;
863     // END: Get log of near end energy
864
865     WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, &tmpStored);
866
867     // Shift buffers
868     memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy,
869             sizeof(int16_t) * (MAX_BUF_LEN - 1));
870     memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy,
871             sizeof(int16_t) * (MAX_BUF_LEN - 1));
872
873     // Logarithm of delayed far end energy
874     tmp16 = kLogLowValue;
875     if (tmpFar)
876     {
877         zeros = WebRtcSpl_NormU32(tmpFar);
878         frac = (int16_t)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpFar, zeros)
879                         & 0x7FFFFFFF), 23);
880         // log2 in Q8
881         tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
882         tmp16 -= WEBRTC_SPL_LSHIFT_W16(far_q, 8);
883     }
884     aecm->farLogEnergy = tmp16;
885
886     // Logarithm of estimated echo energy through adapted channel
887     tmp16 = kLogLowValue;
888     if (tmpAdapt)
889     {
890         zeros = WebRtcSpl_NormU32(tmpAdapt);
891         frac = (int16_t)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpAdapt, zeros)
892                         & 0x7FFFFFFF), 23);
893         //log2 in Q8
894         tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
895         tmp16 -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + far_q, 8);
896     }
897     aecm->echoAdaptLogEnergy[0] = tmp16;
898
899     // Logarithm of estimated echo energy through stored channel
900     tmp16 = kLogLowValue;
901     if (tmpStored)
902     {
903         zeros = WebRtcSpl_NormU32(tmpStored);
904         frac = (int16_t)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpStored, zeros)
905                         & 0x7FFFFFFF), 23);
906         //log2 in Q8
907         tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
908         tmp16 -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + far_q, 8);
909     }
910     aecm->echoStoredLogEnergy[0] = tmp16;
911
912     // Update farend energy levels (min, max, vad, mse)
913     if (aecm->farLogEnergy > FAR_ENERGY_MIN)
914     {
915         if (aecm->startupState == 0)
916         {
917             increase_max_shifts = 2;
918             decrease_min_shifts = 2;
919             increase_min_shifts = 8;
920         }
921
922         aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy,
923                                                  increase_min_shifts, decrease_min_shifts);
924         aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy,
925                                                  increase_max_shifts, decrease_max_shifts);
926         aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin);
927
928         // Dynamic VAD region size
929         tmp16 = 2560 - aecm->farEnergyMin;
930         if (tmp16 > 0)
931         {
932             tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, FAR_ENERGY_VAD_REGION, 9);
933         } else
934         {
935             tmp16 = 0;
936         }
937         tmp16 += FAR_ENERGY_VAD_REGION;
938
939         if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024))
940         {
941             // In startup phase or VAD update halted
942             aecm->farEnergyVAD = aecm->farEnergyMin + tmp16;
943         } else
944         {
945             if (aecm->farEnergyVAD > aecm->farLogEnergy)
946             {
947                 aecm->farEnergyVAD += WEBRTC_SPL_RSHIFT_W16(aecm->farLogEnergy +
948                                                             tmp16 -
949                                                             aecm->farEnergyVAD,
950                                                             6);
951                 aecm->vadUpdateCount = 0;
952             } else
953             {
954                 aecm->vadUpdateCount++;
955             }
956         }
957         // Put MSE threshold higher than VAD
958         aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8);
959     }
960
961     // Update VAD variables
962     if (aecm->farLogEnergy > aecm->farEnergyVAD)
963     {
964         if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF))
965         {
966             // We are in startup or have significant dynamics in input speech level
967             aecm->currentVADValue = 1;
968         }
969     } else
970     {
971         aecm->currentVADValue = 0;
972     }
973     if ((aecm->currentVADValue) && (aecm->firstVAD))
974     {
975         aecm->firstVAD = 0;
976         if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0])
977         {
978             // The estimated echo has higher energy than the near end signal.
979             // This means that the initialization was too aggressive. Scale
980             // down by a factor 8
981             for (i = 0; i < PART_LEN1; i++)
982             {
983                 aecm->channelAdapt16[i] >>= 3;
984             }
985             // Compensate the adapted echo energy level accordingly.
986             aecm->echoAdaptLogEnergy[0] -= (3 << 8);
987             aecm->firstVAD = 1;
988         }
989     }
990 }
991
992 // WebRtcAecm_CalcStepSize(...)
993 //
994 // This function calculates the step size used in channel estimation
995 //
996 //
997 // @param  aecm  [in]    Handle of the AECM instance.
998 // @param  mu    [out]   (Return value) Stepsize in log2(), i.e. number of shifts.
999 //
1000 //
1001 int16_t WebRtcAecm_CalcStepSize(AecmCore_t * const aecm)
1002 {
1003
1004     int32_t tmp32;
1005     int16_t tmp16;
1006     int16_t mu = MU_MAX;
1007
1008     // Here we calculate the step size mu used in the
1009     // following NLMS based Channel estimation algorithm
1010     if (!aecm->currentVADValue)
1011     {
1012         // Far end energy level too low, no channel update
1013         mu = 0;
1014     } else if (aecm->startupState > 0)
1015     {
1016         if (aecm->farEnergyMin >= aecm->farEnergyMax)
1017         {
1018             mu = MU_MIN;
1019         } else
1020         {
1021             tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin);
1022             tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, MU_DIFF);
1023             tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin);
1024             mu = MU_MIN - 1 - (int16_t)(tmp32);
1025             // The -1 is an alternative to rounding. This way we get a larger
1026             // stepsize, so we in some sense compensate for truncation in NLMS
1027         }
1028         if (mu < MU_MAX)
1029         {
1030             mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX
1031         }
1032     }
1033
1034     return mu;
1035 }
1036
1037 // WebRtcAecm_UpdateChannel(...)
1038 //
1039 // This function performs channel estimation. NLMS and decision on channel storage.
1040 //
1041 //
1042 // @param  aecm         [i/o]   Handle of the AECM instance.
1043 // @param  far_spectrum [in]    Absolute value of the farend signal in Q(far_q)
1044 // @param  far_q        [in]    Q-domain of the farend signal
1045 // @param  dfa          [in]    Absolute value of the nearend signal (Q[aecm->dfaQDomain])
1046 // @param  mu           [in]    NLMS step size.
1047 // @param  echoEst      [i/o]   Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
1048 //
1049 void WebRtcAecm_UpdateChannel(AecmCore_t * aecm,
1050                               const uint16_t* far_spectrum,
1051                               const int16_t far_q,
1052                               const uint16_t * const dfa,
1053                               const int16_t mu,
1054                               int32_t * echoEst)
1055 {
1056
1057     uint32_t tmpU32no1, tmpU32no2;
1058     int32_t tmp32no1, tmp32no2;
1059     int32_t mseStored;
1060     int32_t mseAdapt;
1061
1062     int i;
1063
1064     int16_t zerosFar, zerosNum, zerosCh, zerosDfa;
1065     int16_t shiftChFar, shiftNum, shift2ResChan;
1066     int16_t tmp16no1;
1067     int16_t xfaQ, dfaQ;
1068
1069     // This is the channel estimation algorithm. It is base on NLMS but has a variable step
1070     // length, which was calculated above.
1071     if (mu)
1072     {
1073         for (i = 0; i < PART_LEN1; i++)
1074         {
1075             // Determine norm of channel and farend to make sure we don't get overflow in
1076             // multiplication
1077             zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]);
1078             zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]);
1079             if (zerosCh + zerosFar > 31)
1080             {
1081                 // Multiplication is safe
1082                 tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i],
1083                         far_spectrum[i]);
1084                 shiftChFar = 0;
1085             } else
1086             {
1087                 // We need to shift down before multiplication
1088                 shiftChFar = 32 - zerosCh - zerosFar;
1089                 tmpU32no1 = WEBRTC_SPL_UMUL_32_16(
1090                     WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], shiftChFar),
1091                     far_spectrum[i]);
1092             }
1093             // Determine Q-domain of numerator
1094             zerosNum = WebRtcSpl_NormU32(tmpU32no1);
1095             if (dfa[i])
1096             {
1097                 zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]);
1098             } else
1099             {
1100                 zerosDfa = 32;
1101             }
1102             tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain -
1103                 RESOLUTION_CHANNEL32 - far_q + shiftChFar;
1104             if (zerosNum > tmp16no1 + 1)
1105             {
1106                 xfaQ = tmp16no1;
1107                 dfaQ = zerosDfa - 2;
1108             } else
1109             {
1110                 xfaQ = zerosNum - 2;
1111                 dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain -
1112                     shiftChFar + xfaQ;
1113             }
1114             // Add in the same Q-domain
1115             tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ);
1116             tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ);
1117             tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1;
1118             zerosNum = WebRtcSpl_NormW32(tmp32no1);
1119             if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q)))
1120             {
1121                 //
1122                 // Update is needed
1123                 //
1124                 // This is what we would like to compute
1125                 //
1126                 // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i])
1127                 // tmp32norm = (i + 1)
1128                 // aecm->channelAdapt[i] += (2^mu) * tmp32no1
1129                 //                        / (tmp32norm * far_spectrum[i])
1130                 //
1131
1132                 // Make sure we don't get overflow in multiplication.
1133                 if (zerosNum + zerosFar > 31)
1134                 {
1135                     if (tmp32no1 > 0)
1136                     {
1137                         tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1,
1138                                                                         far_spectrum[i]);
1139                     } else
1140                     {
1141                         tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1,
1142                                                                          far_spectrum[i]);
1143                     }
1144                     shiftNum = 0;
1145                 } else
1146                 {
1147                     shiftNum = 32 - (zerosNum + zerosFar);
1148                     if (tmp32no1 > 0)
1149                     {
1150                         tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(
1151                                 WEBRTC_SPL_RSHIFT_W32(tmp32no1, shiftNum),
1152                                 far_spectrum[i]);
1153                     } else
1154                     {
1155                         tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(
1156                                 WEBRTC_SPL_RSHIFT_W32(-tmp32no1, shiftNum),
1157                                 far_spectrum[i]);
1158                     }
1159                 }
1160                 // Normalize with respect to frequency bin
1161                 tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1);
1162                 // Make sure we are in the right Q-domain
1163                 shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1);
1164                 if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan)
1165                 {
1166                     tmp32no2 = WEBRTC_SPL_WORD32_MAX;
1167                 } else
1168                 {
1169                     tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan);
1170                 }
1171                 aecm->channelAdapt32[i] = WEBRTC_SPL_ADD_SAT_W32(aecm->channelAdapt32[i],
1172                         tmp32no2);
1173                 if (aecm->channelAdapt32[i] < 0)
1174                 {
1175                     // We can never have negative channel gain
1176                     aecm->channelAdapt32[i] = 0;
1177                 }
1178                 aecm->channelAdapt16[i]
1179                         = (int16_t)WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], 16);
1180             }
1181         }
1182     }
1183     // END: Adaptive channel update
1184
1185     // Determine if we should store or restore the channel
1186     if ((aecm->startupState == 0) & (aecm->currentVADValue))
1187     {
1188         // During startup we store the channel every block,
1189         // and we recalculate echo estimate
1190         WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst);
1191     } else
1192     {
1193         if (aecm->farLogEnergy < aecm->farEnergyMSE)
1194         {
1195             aecm->mseChannelCount = 0;
1196         } else
1197         {
1198             aecm->mseChannelCount++;
1199         }
1200         // Enough data for validation. Store channel if we can.
1201         if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10))
1202         {
1203             // We have enough data.
1204             // Calculate MSE of "Adapt" and "Stored" versions.
1205             // It is actually not MSE, but average absolute error.
1206             mseStored = 0;
1207             mseAdapt = 0;
1208             for (i = 0; i < MIN_MSE_COUNT; i++)
1209             {
1210                 tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i]
1211                         - (int32_t)aecm->nearLogEnergy[i]);
1212                 tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
1213                 mseStored += tmp32no2;
1214
1215                 tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i]
1216                         - (int32_t)aecm->nearLogEnergy[i]);
1217                 tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
1218                 mseAdapt += tmp32no2;
1219             }
1220             if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt))
1221                     & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF
1222                             * aecm->mseAdaptOld)))
1223             {
1224                 // The stored channel has a significantly lower MSE than the adaptive one for
1225                 // two consecutive calculations. Reset the adaptive channel.
1226                 WebRtcAecm_ResetAdaptiveChannel(aecm);
1227             } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt
1228                     < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold))
1229             {
1230                 // The adaptive channel has a significantly lower MSE than the stored one.
1231                 // The MSE for the adaptive channel has also been low for two consecutive
1232                 // calculations. Store the adaptive channel.
1233                 WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst);
1234
1235                 // Update threshold
1236                 if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX)
1237                 {
1238                     aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld);
1239                 } else
1240                 {
1241                     aecm->mseThreshold += WEBRTC_SPL_MUL_16_16_RSFT(mseAdapt
1242                             - WEBRTC_SPL_MUL_16_16_RSFT(aecm->mseThreshold, 5, 3), 205, 8);
1243                 }
1244
1245             }
1246
1247             // Reset counter
1248             aecm->mseChannelCount = 0;
1249
1250             // Store the MSE values.
1251             aecm->mseStoredOld = mseStored;
1252             aecm->mseAdaptOld = mseAdapt;
1253         }
1254     }
1255     // END: Determine if we should store or reset channel estimate.
1256 }
1257
1258 // CalcSuppressionGain(...)
1259 //
1260 // This function calculates the suppression gain that is used in the Wiener filter.
1261 //
1262 //
1263 // @param  aecm     [i/n]   Handle of the AECM instance.
1264 // @param  supGain  [out]   (Return value) Suppression gain with which to scale the noise
1265 //                          level (Q14).
1266 //
1267 //
1268 static int16_t CalcSuppressionGain(AecmCore_t * const aecm)
1269 {
1270     int32_t tmp32no1;
1271
1272     int16_t supGain = SUPGAIN_DEFAULT;
1273     int16_t tmp16no1;
1274     int16_t dE = 0;
1275
1276     // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far
1277     // end energy and echo estimation error.
1278     // Adjust for the far end signal level. A low signal level indicates no far end signal,
1279     // hence we set the suppression gain to 0
1280     if (!aecm->currentVADValue)
1281     {
1282         supGain = 0;
1283     } else
1284     {
1285         // Adjust for possible double talk. If we have large variations in estimation error we
1286         // likely have double talk (or poor channel).
1287         tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET);
1288         dE = WEBRTC_SPL_ABS_W16(tmp16no1);
1289
1290         if (dE < ENERGY_DEV_TOL)
1291         {
1292             // Likely no double talk. The better estimation, the more we can suppress signal.
1293             // Update counters
1294             if (dE < SUPGAIN_EPC_DT)
1295             {
1296                 tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffAB, dE);
1297                 tmp32no1 += (SUPGAIN_EPC_DT >> 1);
1298                 tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT);
1299                 supGain = aecm->supGainErrParamA - tmp16no1;
1300             } else
1301             {
1302                 tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffBD,
1303                                                 (ENERGY_DEV_TOL - dE));
1304                 tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1);
1305                 tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL
1306                         - SUPGAIN_EPC_DT));
1307                 supGain = aecm->supGainErrParamD + tmp16no1;
1308             }
1309         } else
1310         {
1311             // Likely in double talk. Use default value
1312             supGain = aecm->supGainErrParamD;
1313         }
1314     }
1315
1316     if (supGain > aecm->supGainOld)
1317     {
1318         tmp16no1 = supGain;
1319     } else
1320     {
1321         tmp16no1 = aecm->supGainOld;
1322     }
1323     aecm->supGainOld = supGain;
1324     if (tmp16no1 < aecm->supGain)
1325     {
1326         aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4);
1327     } else
1328     {
1329         aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4);
1330     }
1331
1332     // END: Update suppression gain
1333
1334     return aecm->supGain;
1335 }
1336
1337 // Transforms a time domain signal into the frequency domain, outputting the
1338 // complex valued signal, absolute value and sum of absolute values.
1339 //
1340 // time_signal          [in]    Pointer to time domain signal
1341 // freq_signal_real     [out]   Pointer to real part of frequency domain array
1342 // freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
1343 //                              array
1344 // freq_signal_abs      [out]   Pointer to absolute value of frequency domain
1345 //                              array
1346 // freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
1347 //                              the frequency domain array
1348 // return value                 The Q-domain of current frequency values
1349 //
1350 static int TimeToFrequencyDomain(AecmCore_t* aecm,
1351                                  const int16_t* time_signal,
1352                                  complex16_t* freq_signal,
1353                                  uint16_t* freq_signal_abs,
1354                                  uint32_t* freq_signal_sum_abs)
1355 {
1356     int i = 0;
1357     int time_signal_scaling = 0;
1358
1359     int32_t tmp32no1 = 0;
1360     int32_t tmp32no2 = 0;
1361
1362     // In fft_buf, +16 for 32-byte alignment.
1363     int16_t fft_buf[PART_LEN4 + 16];
1364     int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
1365
1366     int16_t tmp16no1;
1367 #ifndef WEBRTC_ARCH_ARM_V7
1368     int16_t tmp16no2;
1369 #endif
1370 #ifdef AECM_WITH_ABS_APPROX
1371     int16_t max_value = 0;
1372     int16_t min_value = 0;
1373     uint16_t alpha = 0;
1374     uint16_t beta = 0;
1375 #endif
1376
1377 #ifdef AECM_DYNAMIC_Q
1378     tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
1379     time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
1380 #endif
1381
1382     WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
1383
1384     // Extract imaginary and real part, calculate the magnitude for all frequency bins
1385     freq_signal[0].imag = 0;
1386     freq_signal[PART_LEN].imag = 0;
1387     freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(
1388         freq_signal[0].real);
1389     freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
1390         freq_signal[PART_LEN].real);
1391     (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
1392         (uint32_t)(freq_signal_abs[PART_LEN]);
1393
1394     for (i = 1; i < PART_LEN; i++)
1395     {
1396         if (freq_signal[i].real == 0)
1397         {
1398             freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
1399                 freq_signal[i].imag);
1400         }
1401         else if (freq_signal[i].imag == 0)
1402         {
1403             freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
1404                 freq_signal[i].real);
1405         }
1406         else
1407         {
1408             // Approximation for magnitude of complex fft output
1409             // magn = sqrt(real^2 + imag^2)
1410             // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
1411             //
1412             // The parameters alpha and beta are stored in Q15
1413
1414 #ifdef AECM_WITH_ABS_APPROX
1415             tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
1416             tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
1417
1418             if(tmp16no1 > tmp16no2)
1419             {
1420                 max_value = tmp16no1;
1421                 min_value = tmp16no2;
1422             } else
1423             {
1424                 max_value = tmp16no2;
1425                 min_value = tmp16no1;
1426             }
1427
1428             // Magnitude in Q(-6)
1429             if ((max_value >> 2) > min_value)
1430             {
1431                 alpha = kAlpha1;
1432                 beta = kBeta1;
1433             } else if ((max_value >> 1) > min_value)
1434             {
1435                 alpha = kAlpha2;
1436                 beta = kBeta2;
1437             } else
1438             {
1439                 alpha = kAlpha3;
1440                 beta = kBeta3;
1441             }
1442             tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(max_value,
1443                                                                 alpha,
1444                                                                 15);
1445             tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(min_value,
1446                                                                 beta,
1447                                                                 15);
1448             freq_signal_abs[i] = (uint16_t)tmp16no1 +
1449                 (uint16_t)tmp16no2;
1450 #else
1451 #ifdef WEBRTC_ARCH_ARM_V7
1452             __asm __volatile(
1453               "smulbb %[tmp32no1], %[real], %[real]\n\t"
1454               "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
1455               :[tmp32no1]"+r"(tmp32no1),
1456                [tmp32no2]"=r"(tmp32no2)
1457               :[real]"r"(freq_signal[i].real),
1458                [imag]"r"(freq_signal[i].imag)
1459             );
1460 #else
1461             tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
1462             tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
1463             tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
1464             tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
1465             tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
1466 #endif // WEBRTC_ARCH_ARM_V7
1467             tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
1468
1469             freq_signal_abs[i] = (uint16_t)tmp32no1;
1470 #endif // AECM_WITH_ABS_APPROX
1471         }
1472         (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
1473     }
1474
1475     return time_signal_scaling;
1476 }
1477
1478 int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
1479                             const int16_t * farend,
1480                             const int16_t * nearendNoisy,
1481                             const int16_t * nearendClean,
1482                             int16_t * output)
1483 {
1484     int i;
1485
1486     uint32_t xfaSum;
1487     uint32_t dfaNoisySum;
1488     uint32_t dfaCleanSum;
1489     uint32_t echoEst32Gained;
1490     uint32_t tmpU32;
1491
1492     int32_t tmp32no1;
1493
1494     uint16_t xfa[PART_LEN1];
1495     uint16_t dfaNoisy[PART_LEN1];
1496     uint16_t dfaClean[PART_LEN1];
1497     uint16_t* ptrDfaClean = dfaClean;
1498     const uint16_t* far_spectrum_ptr = NULL;
1499
1500     // 32 byte aligned buffers (with +8 or +16).
1501     // TODO (kma): define fft with complex16_t.
1502     int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
1503     int32_t echoEst32_buf[PART_LEN1 + 8];
1504     int32_t dfw_buf[PART_LEN2 + 8];
1505     int32_t efw_buf[PART_LEN2 + 8];
1506
1507     int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
1508     int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
1509     complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31);
1510     complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31);
1511
1512     int16_t hnl[PART_LEN1];
1513     int16_t numPosCoef = 0;
1514     int16_t nlpGain = ONE_Q14;
1515     int delay;
1516     int16_t tmp16no1;
1517     int16_t tmp16no2;
1518     int16_t mu;
1519     int16_t supGain;
1520     int16_t zeros32, zeros16;
1521     int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
1522     int far_q;
1523     int16_t resolutionDiff, qDomainDiff;
1524
1525     const int kMinPrefBand = 4;
1526     const int kMaxPrefBand = 24;
1527     int32_t avgHnl32 = 0;
1528
1529     // Determine startup state. There are three states:
1530     // (0) the first CONV_LEN blocks
1531     // (1) another CONV_LEN blocks
1532     // (2) the rest
1533
1534     if (aecm->startupState < 2)
1535     {
1536         aecm->startupState = (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2);
1537     }
1538     // END: Determine startup state
1539
1540     // Buffer near and far end signals
1541     memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
1542     memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
1543     if (nearendClean != NULL)
1544     {
1545         memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(int16_t) * PART_LEN);
1546     }
1547
1548     // Transform far end signal from time domain to frequency domain.
1549     far_q = TimeToFrequencyDomain(aecm,
1550                                   aecm->xBuf,
1551                                   dfw,
1552                                   xfa,
1553                                   &xfaSum);
1554
1555     // Transform noisy near end signal from time domain to frequency domain.
1556     zerosDBufNoisy = TimeToFrequencyDomain(aecm,
1557                                            aecm->dBufNoisy,
1558                                            dfw,
1559                                            dfaNoisy,
1560                                            &dfaNoisySum);
1561     aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
1562     aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
1563
1564
1565     if (nearendClean == NULL)
1566     {
1567         ptrDfaClean = dfaNoisy;
1568         aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
1569         aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
1570         dfaCleanSum = dfaNoisySum;
1571     } else
1572     {
1573         // Transform clean near end signal from time domain to frequency domain.
1574         zerosDBufClean = TimeToFrequencyDomain(aecm,
1575                                                aecm->dBufClean,
1576                                                dfw,
1577                                                dfaClean,
1578                                                &dfaCleanSum);
1579         aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
1580         aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
1581     }
1582
1583     // Get the delay
1584     // Save far-end history and estimate delay
1585     UpdateFarHistory(aecm, xfa, far_q);
1586     if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
1587                                  far_q) == -1) {
1588       return -1;
1589     }
1590     delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
1591                                             dfaNoisy,
1592                                             PART_LEN1,
1593                                             zerosDBufNoisy);
1594     if (delay == -1)
1595     {
1596         return -1;
1597     }
1598     else if (delay == -2)
1599     {
1600         // If the delay is unknown, we assume zero.
1601         // NOTE: this will have to be adjusted if we ever add lookahead.
1602         delay = 0;
1603     }
1604
1605     if (aecm->fixedDelay >= 0)
1606     {
1607         // Use fixed delay
1608         delay = aecm->fixedDelay;
1609     }
1610
1611     // Get aligned far end spectrum
1612     far_spectrum_ptr = AlignedFarend(aecm, &far_q, delay);
1613     zerosXBuf = (int16_t) far_q;
1614     if (far_spectrum_ptr == NULL)
1615     {
1616         return -1;
1617     }
1618
1619     // Calculate log(energy) and update energy threshold levels
1620     WebRtcAecm_CalcEnergies(aecm,
1621                             far_spectrum_ptr,
1622                             zerosXBuf,
1623                             dfaNoisySum,
1624                             echoEst32);
1625
1626     // Calculate stepsize
1627     mu = WebRtcAecm_CalcStepSize(aecm);
1628
1629     // Update counters
1630     aecm->totCount++;
1631
1632     // This is the channel estimation algorithm.
1633     // It is base on NLMS but has a variable step length, which was calculated above.
1634     WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, echoEst32);
1635     supGain = CalcSuppressionGain(aecm);
1636
1637
1638     // Calculate Wiener filter hnl[]
1639     for (i = 0; i < PART_LEN1; i++)
1640     {
1641         // Far end signal through channel estimate in Q8
1642         // How much can we shift right to preserve resolution
1643         tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
1644         aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8);
1645
1646         zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
1647         zeros16 = WebRtcSpl_NormW16(supGain) + 1;
1648         if (zeros32 + zeros16 > 16)
1649         {
1650             // Multiplication is safe
1651             // Result in Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
1652             echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
1653                                                     (uint16_t)supGain);
1654             resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
1655             resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
1656         } else
1657         {
1658             tmp16no1 = 17 - zeros32 - zeros16;
1659             resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
1660             resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
1661             if (zeros32 > tmp16no1)
1662             {
1663                 echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
1664                         (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain,
1665                                 tmp16no1)); // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
1666             } else
1667             {
1668                 // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
1669                 echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
1670                         (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], tmp16no1),
1671                         (uint16_t)supGain);
1672             }
1673         }
1674
1675         zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
1676         if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld))
1677                 & (aecm->nearFilt[i]))
1678         {
1679             tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16);
1680             qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld;
1681         } else
1682         {
1683             tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i],
1684                                             aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld);
1685             qDomainDiff = 0;
1686         }
1687         tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff);
1688         tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
1689         tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4);
1690         tmp16no2 += tmp16no1;
1691         zeros16 = WebRtcSpl_NormW16(tmp16no2);
1692         if ((tmp16no2) & (-qDomainDiff > zeros16))
1693         {
1694             aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
1695         } else
1696         {
1697             aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff);
1698         }
1699
1700         // Wiener filter coefficients, resulting hnl in Q14
1701         if (echoEst32Gained == 0)
1702         {
1703             hnl[i] = ONE_Q14;
1704         } else if (aecm->nearFilt[i] == 0)
1705         {
1706             hnl[i] = 0;
1707         } else
1708         {
1709             // Multiply the suppression gain
1710             // Rounding
1711             echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
1712             tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]);
1713
1714             // Current resolution is
1715             // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN - max(0, 17 - zeros16 - zeros32))
1716             // Make sure we are in Q14
1717             tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
1718             if (tmp32no1 > ONE_Q14)
1719             {
1720                 hnl[i] = 0;
1721             } else if (tmp32no1 < 0)
1722             {
1723                 hnl[i] = ONE_Q14;
1724             } else
1725             {
1726                 // 1-echoEst/dfa
1727                 hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
1728                 if (hnl[i] < 0)
1729                 {
1730                     hnl[i] = 0;
1731                 }
1732             }
1733         }
1734         if (hnl[i])
1735         {
1736             numPosCoef++;
1737         }
1738     }
1739     // Only in wideband. Prevent the gain in upper band from being larger than
1740     // in lower band.
1741     if (aecm->mult == 2)
1742     {
1743         // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
1744         //               speech distortion in double-talk.
1745         for (i = 0; i < PART_LEN1; i++)
1746         {
1747             hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14);
1748         }
1749
1750         for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
1751         {
1752             avgHnl32 += (int32_t)hnl[i];
1753         }
1754         assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
1755         avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
1756
1757         for (i = kMaxPrefBand; i < PART_LEN1; i++)
1758         {
1759             if (hnl[i] > (int16_t)avgHnl32)
1760             {
1761                 hnl[i] = (int16_t)avgHnl32;
1762             }
1763         }
1764     }
1765
1766     // Calculate NLP gain, result is in Q14
1767     if (aecm->nlpFlag)
1768     {
1769         for (i = 0; i < PART_LEN1; i++)
1770         {
1771             // Truncate values close to zero and one.
1772             if (hnl[i] > NLP_COMP_HIGH)
1773             {
1774                 hnl[i] = ONE_Q14;
1775             } else if (hnl[i] < NLP_COMP_LOW)
1776             {
1777                 hnl[i] = 0;
1778             }
1779
1780             // Remove outliers
1781             if (numPosCoef < 3)
1782             {
1783                 nlpGain = 0;
1784             } else
1785             {
1786                 nlpGain = ONE_Q14;
1787             }
1788
1789             // NLP
1790             if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
1791             {
1792                 hnl[i] = ONE_Q14;
1793             } else
1794             {
1795                 hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14);
1796             }
1797
1798             // multiply with Wiener coefficients
1799             efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
1800                                                                             hnl[i], 14));
1801             efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
1802                                                                             hnl[i], 14));
1803         }
1804     }
1805     else
1806     {
1807         // multiply with Wiener coefficients
1808         for (i = 0; i < PART_LEN1; i++)
1809         {
1810             efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
1811                                                                            hnl[i], 14));
1812             efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
1813                                                                            hnl[i], 14));
1814         }
1815     }
1816
1817     if (aecm->cngMode == AecmTrue)
1818     {
1819         ComfortNoise(aecm, ptrDfaClean, efw, hnl);
1820     }
1821
1822     InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
1823
1824     return 0;
1825 }
1826
1827
1828 // Generate comfort noise and add to output signal.
1829 //
1830 // \param[in]     aecm     Handle of the AECM instance.
1831 // \param[in]     dfa     Absolute value of the nearend signal (Q[aecm->dfaQDomain]).
1832 // \param[in,out] outReal Real part of the output signal (Q[aecm->dfaQDomain]).
1833 // \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]).
1834 // \param[in]     lambda  Suppression gain with which to scale the noise level (Q14).
1835 //
1836 static void ComfortNoise(AecmCore_t* aecm,
1837                          const uint16_t* dfa,
1838                          complex16_t* out,
1839                          const int16_t* lambda)
1840 {
1841     int16_t i;
1842     int16_t tmp16;
1843     int32_t tmp32;
1844
1845     int16_t randW16[PART_LEN];
1846     int16_t uReal[PART_LEN1];
1847     int16_t uImag[PART_LEN1];
1848     int32_t outLShift32;
1849     int16_t noiseRShift16[PART_LEN1];
1850
1851     int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
1852     int16_t minTrackShift;
1853
1854     assert(shiftFromNearToNoise >= 0);
1855     assert(shiftFromNearToNoise < 16);
1856
1857     if (aecm->noiseEstCtr < 100)
1858     {
1859         // Track the minimum more quickly initially.
1860         aecm->noiseEstCtr++;
1861         minTrackShift = 6;
1862     } else
1863     {
1864         minTrackShift = 9;
1865     }
1866
1867     // Estimate noise power.
1868     for (i = 0; i < PART_LEN1; i++)
1869     {
1870
1871         // Shift to the noise domain.
1872         tmp32 = (int32_t)dfa[i];
1873         outLShift32 = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
1874
1875         if (outLShift32 < aecm->noiseEst[i])
1876         {
1877             // Reset "too low" counter
1878             aecm->noiseEstTooLowCtr[i] = 0;
1879             // Track the minimum.
1880             if (aecm->noiseEst[i] < (1 << minTrackShift))
1881             {
1882                 // For small values, decrease noiseEst[i] every
1883                 // |kNoiseEstIncCount| block. The regular approach below can not
1884                 // go further down due to truncation.
1885                 aecm->noiseEstTooHighCtr[i]++;
1886                 if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
1887                 {
1888                     aecm->noiseEst[i]--;
1889                     aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
1890                 }
1891             }
1892             else
1893             {
1894                 aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) >> minTrackShift);
1895             }
1896         } else
1897         {
1898             // Reset "too high" counter
1899             aecm->noiseEstTooHighCtr[i] = 0;
1900             // Ramp slowly upwards until we hit the minimum again.
1901             if ((aecm->noiseEst[i] >> 19) > 0)
1902             {
1903                 // Avoid overflow.
1904                 // Multiplication with 2049 will cause wrap around. Scale
1905                 // down first and then multiply
1906                 aecm->noiseEst[i] >>= 11;
1907                 aecm->noiseEst[i] *= 2049;
1908             }
1909             else if ((aecm->noiseEst[i] >> 11) > 0)
1910             {
1911                 // Large enough for relative increase
1912                 aecm->noiseEst[i] *= 2049;
1913                 aecm->noiseEst[i] >>= 11;
1914             }
1915             else
1916             {
1917                 // Make incremental increases based on size every
1918                 // |kNoiseEstIncCount| block
1919                 aecm->noiseEstTooLowCtr[i]++;
1920                 if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
1921                 {
1922                     aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
1923                     aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
1924                 }
1925             }
1926         }
1927     }
1928
1929     for (i = 0; i < PART_LEN1; i++)
1930     {
1931         tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise);
1932         if (tmp32 > 32767)
1933         {
1934             tmp32 = 32767;
1935             aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
1936         }
1937         noiseRShift16[i] = (int16_t)tmp32;
1938
1939         tmp16 = ONE_Q14 - lambda[i];
1940         noiseRShift16[i]
1941                 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, noiseRShift16[i], 14);
1942     }
1943
1944     // Generate a uniform random array on [0 2^15-1].
1945     WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
1946
1947     // Generate noise according to estimated energy.
1948     uReal[0] = 0; // Reject LF noise.
1949     uImag[0] = 0;
1950     for (i = 1; i < PART_LEN1; i++)
1951     {
1952         // Get a random index for the cos and sin tables over [0 359].
1953         tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15);
1954
1955         // Tables are in Q13.
1956         uReal[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i],
1957                 kCosTable[tmp16], 13);
1958         uImag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i],
1959                 kSinTable[tmp16], 13);
1960     }
1961     uImag[PART_LEN] = 0;
1962
1963     for (i = 0; i < PART_LEN1; i++)
1964     {
1965         out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]);
1966         out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]);
1967     }
1968 }
1969
1970 void WebRtcAecm_BufferFarFrame(AecmCore_t* const aecm,
1971                                const int16_t* const farend,
1972                                const int farLen)
1973 {
1974     int writeLen = farLen, writePos = 0;
1975
1976     // Check if the write position must be wrapped
1977     while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN)
1978     {
1979         // Write to remaining buffer space before wrapping
1980         writeLen = FAR_BUF_LEN - aecm->farBufWritePos;
1981         memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
1982                sizeof(int16_t) * writeLen);
1983         aecm->farBufWritePos = 0;
1984         writePos = writeLen;
1985         writeLen = farLen - writeLen;
1986     }
1987
1988     memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
1989            sizeof(int16_t) * writeLen);
1990     aecm->farBufWritePos += writeLen;
1991 }
1992
1993 void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, int16_t * const farend,
1994                               const int farLen, const int knownDelay)
1995 {
1996     int readLen = farLen;
1997     int readPos = 0;
1998     int delayChange = knownDelay - aecm->lastKnownDelay;
1999
2000     aecm->farBufReadPos -= delayChange;
2001
2002     // Check if delay forces a read position wrap
2003     while (aecm->farBufReadPos < 0)
2004     {
2005         aecm->farBufReadPos += FAR_BUF_LEN;
2006     }
2007     while (aecm->farBufReadPos > FAR_BUF_LEN - 1)
2008     {
2009         aecm->farBufReadPos -= FAR_BUF_LEN;
2010     }
2011
2012     aecm->lastKnownDelay = knownDelay;
2013
2014     // Check if read position must be wrapped
2015     while (aecm->farBufReadPos + readLen > FAR_BUF_LEN)
2016     {
2017
2018         // Read from remaining buffer space before wrapping
2019         readLen = FAR_BUF_LEN - aecm->farBufReadPos;
2020         memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
2021                sizeof(int16_t) * readLen);
2022         aecm->farBufReadPos = 0;
2023         readPos = readLen;
2024         readLen = farLen - readLen;
2025     }
2026     memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
2027            sizeof(int16_t) * readLen);
2028     aecm->farBufReadPos += readLen;
2029 }