2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 #include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
14 #include "webrtc/modules/audio_processing/ns/nsx_core.h"
16 static const int16_t kIndicatorTable[17] = {
17 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
18 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
21 // Compute speech/noise probability
22 // speech/noise probability is returned in: probSpeechFinal
23 //snrLocPrior is the prior SNR for each frequency (in Q11)
24 //snrLocPost is the post SNR for each frequency (in Q11)
25 void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
26 uint16_t* nonSpeechProbFinal,
27 uint32_t* priorLocSnr,
28 uint32_t* postLocSnr) {
30 uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
31 int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
32 int32_t frac32, logTmp;
33 int32_t logLrtTimeAvgKsumFX;
35 int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
36 int i, normTmp, normTmp2, nShifts;
38 // compute feature based on average LR factor
39 // this is the average over all frequencies of the smooth log LRT
40 logLrtTimeAvgKsumFX = 0;
41 for (i = 0; i < inst->magnLen; i++) {
42 besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
43 normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
44 num = postLocSnr[i] << normTmp; // Q(11+normTmp)
46 den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp)
48 den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
51 besselTmpFX32 -= num / den; // Q11
56 // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
57 // - inst->logLrtTimeAvg[i]);
58 // Here, LRT_TAVG = 0.5
59 zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
60 frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
61 tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
62 tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
63 tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
65 // tmp32 = log2(priorLocSnr[i])
66 tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
67 logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2)
68 tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1);
70 inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
72 logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
74 inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
76 // 5 = BIN_SIZE_LRT / 2
77 // done with computation of LR factor
80 //compute the indicator functions
83 // average LRT feature
85 // indicator0 = 0.5 * (tanh(widthPrior *
86 // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
87 tmpIndFX = 16384; // Q14(1.0)
88 tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
89 nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
90 //use larger width in tanh map for pause regions
94 //widthPrior = widthPrior * 2.0;
97 tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
98 // compute indicator function: sigmoid map
99 tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
100 if ((tableIndex < 16) && (tableIndex >= 0)) {
101 tmp16no2 = kIndicatorTable[tableIndex];
102 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
103 frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
104 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
106 tmpIndFX = 8192 - tmp16no2; // Q14
108 tmpIndFX = 8192 + tmp16no2; // Q14
111 indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
113 //spectral flatness feature
114 if (inst->weightSpecFlat) {
115 tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
116 tmpIndFX = 16384; // Q14(1.0)
117 //use larger width in tanh map for pause regions
118 tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
120 if (inst->thresholdSpecFlat < tmpU32no1) {
122 tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
123 //widthPrior = widthPrior * 2.0;
126 tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14
127 // compute indicator function: sigmoid map
129 // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
130 // (threshPrior1 - tmpFloat1)) + 1.0);
131 tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
132 if (tableIndex < 16) {
133 tmp16no2 = kIndicatorTable[tableIndex];
134 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
135 frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
136 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
138 tmpIndFX = 8192 + tmp16no2; // Q14
140 tmpIndFX = 8192 - tmp16no2; // Q14
143 indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
146 //for template spectral-difference
147 if (inst->weightSpecDiff) {
149 if (inst->featureSpecDiff) {
150 normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
151 WebRtcSpl_NormU32(inst->featureSpecDiff));
152 assert(normTmp >= 0);
153 tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
154 tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
155 20 - inst->stages - normTmp);
157 // Q(20 - inst->stages)
158 tmpU32no1 /= tmpU32no2;
160 tmpU32no1 = (uint32_t)(0x7fffffff);
163 tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
164 tmpU32no2 = tmpU32no1 - tmpU32no3;
166 tmpIndFX = 16384; // Q14(1.0)
167 //use larger width in tanh map for pause regions
168 if (tmpU32no2 & 0x80000000) {
170 tmpU32no2 = tmpU32no3 - tmpU32no1;
171 //widthPrior = widthPrior * 2.0;
174 tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
175 // compute indicator function: sigmoid map
177 indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
179 tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
180 if (tableIndex < 16) {
181 tmp16no2 = kIndicatorTable[tableIndex];
182 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
183 frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
184 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
187 tmpIndFX = 8192 + tmp16no2;
189 tmpIndFX = 8192 - tmp16no2;
192 indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
195 //combine the indicator function with the feature weights
197 // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
198 // indicator1 + weightIndPrior2 * indicator2);
199 indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
200 // done with computing indicator function
202 //compute the prior probability
204 // inst->priorNonSpeechProb += PRIOR_UPDATE *
205 // (indPriorNonSpeech - inst->priorNonSpeechProb);
206 tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
207 inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
208 PRIOR_UPDATE_Q14, tmp16, 14); // Q14
210 //final speech probability: combine prior model with LR factor:
212 memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
214 if (inst->priorNonSpeechProb > 0) {
215 for (i = 0; i < inst->magnLen; i++) {
217 // invLrt = exp(inst->logLrtTimeAvg[i]);
218 // invLrt = inst->priorSpeechProb * invLrt;
219 // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
220 // (1.0 - inst->priorSpeechProb + invLrt);
221 // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
222 // nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
223 // (inst->priorNonSpeechProb + invLrt);
224 if (inst->logLrtTimeAvgW32[i] < 65300) {
225 tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(
226 inst->logLrtTimeAvgW32[i], 23637),
228 intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
232 frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
234 // Quadratic approximation of 2^frac
235 tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
236 tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
237 invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
238 + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
240 normTmp = WebRtcSpl_NormW32(invLrtFX);
241 normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
242 if (normTmp + normTmp2 >= 7) {
243 if (normTmp + normTmp2 < 15) {
244 invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
245 // Q(normTmp+normTmp2-7)
246 tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
247 // Q(normTmp+normTmp2+7)
248 invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
251 tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
253 invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
256 tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb,
259 nonSpeechProbFinal[i] = tmp32no1 /
260 (inst->priorNonSpeechProb + invLrtFX); // Q8