Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / third_party / webrtc / modules / audio_processing / ns / nsx_core_mips.c
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include <assert.h>
12
13 #include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
14 #include "webrtc/modules/audio_processing/ns/nsx_core.h"
15
16 static const int16_t kIndicatorTable[17] = {
17   0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
18   7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
19 };
20
21 // Compute speech/noise probability
22 // speech/noise probability is returned in: probSpeechFinal
23 //snrLocPrior is the prior SNR for each frequency (in Q11)
24 //snrLocPost is the post SNR for each frequency (in Q11)
25 void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
26                                uint16_t* nonSpeechProbFinal,
27                                uint32_t* priorLocSnr,
28                                uint32_t* postLocSnr) {
29
30   uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
31   int32_t indPriorFX, tmp32no1;
32   int32_t logLrtTimeAvgKsumFX;
33   int16_t indPriorFX16;
34   int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
35   int i, normTmp, nShifts;
36
37   int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
38   int32_t const_max = 0x7fffffff;
39   int32_t const_neg43 = -43;
40   int32_t const_5412 = 5412;
41   int32_t const_11rsh12 = (11 << 12);
42   int32_t const_178 = 178;
43
44
45   // compute feature based on average LR factor
46   // this is the average over all frequencies of the smooth log LRT
47   logLrtTimeAvgKsumFX = 0;
48   for (i = 0; i < inst->magnLen; i++) {
49     r0 = postLocSnr[i]; // Q11
50     r1 = priorLocSnr[i];
51     r2 = inst->logLrtTimeAvgW32[i];
52
53     __asm __volatile(
54       ".set       push                                    \n\t"
55       ".set       noreorder                               \n\t"
56       "clz        %[r3],    %[r0]                         \n\t"
57       "clz        %[r5],    %[r1]                         \n\t"
58       "slti       %[r4],    %[r3],    32                  \n\t"
59       "slti       %[r6],    %[r5],    32                  \n\t"
60       "movz       %[r3],    $0,       %[r4]               \n\t"
61       "movz       %[r5],    $0,       %[r6]               \n\t"
62       "slti       %[r4],    %[r3],    11                  \n\t"
63       "addiu      %[r6],    %[r3],    -11                 \n\t"
64       "neg        %[r7],    %[r6]                         \n\t"
65       "sllv       %[r6],    %[r1],    %[r6]               \n\t"
66       "srav       %[r7],    %[r1],    %[r7]               \n\t"
67       "movn       %[r6],    %[r7],    %[r4]               \n\t"
68       "sllv       %[r1],    %[r1],    %[r5]               \n\t"
69       "and        %[r1],    %[r1],    %[const_max]        \n\t"
70       "sra        %[r1],    %[r1],    19                  \n\t"
71       "mul        %[r7],    %[r1],    %[r1]               \n\t"
72       "sllv       %[r3],    %[r0],    %[r3]               \n\t"
73       "divu       %[r8],    %[r3],    %[r6]               \n\t"
74       "slti       %[r6],    %[r6],    1                   \n\t"
75       "mul        %[r7],    %[r7],    %[const_neg43]      \n\t"
76       "sra        %[r7],    %[r7],    19                  \n\t"
77       "movz       %[r3],    %[r8],    %[r6]               \n\t"
78       "subu       %[r0],    %[r0],    %[r3]               \n\t"
79       "movn       %[r0],    $0,       %[r6]               \n\t"
80       "mul        %[r1],    %[r1],    %[const_5412]       \n\t"
81       "sra        %[r1],    %[r1],    12                  \n\t"
82       "addu       %[r7],    %[r7],    %[r1]               \n\t"
83       "addiu      %[r1],    %[r7],    37                  \n\t"
84       "addiu      %[r5],    %[r5],    -31                 \n\t"
85       "neg        %[r5],    %[r5]                         \n\t"
86       "sll        %[r5],    %[r5],    12                  \n\t"
87       "addu       %[r5],    %[r5],    %[r1]               \n\t"
88       "subu       %[r7],    %[r5],    %[const_11rsh12]    \n\t"
89       "mul        %[r7],    %[r7],    %[const_178]        \n\t"
90       "sra        %[r7],    %[r7],    8                   \n\t"
91       "addu       %[r7],    %[r7],    %[r2]               \n\t"
92       "sra        %[r7],    %[r7],    1                   \n\t"
93       "subu       %[r2],    %[r2],    %[r7]               \n\t"
94       "addu       %[r2],    %[r2],    %[r0]               \n\t"
95       ".set       pop                                     \n\t"
96       : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
97         [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
98         [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
99       : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
100         [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
101         [const_178] "r" (const_178)
102       : "hi", "lo"
103     );
104     inst->logLrtTimeAvgW32[i] = r2;
105     logLrtTimeAvgKsumFX += r2;
106   }
107
108   inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
109                                               inst->stages + 10);
110                                                   // 5 = BIN_SIZE_LRT / 2
111   // done with computation of LR factor
112
113   //
114   // compute the indicator functions
115   //
116
117   // average LRT feature
118   // FLOAT code
119   // indicator0 = 0.5 * (tanh(widthPrior *
120   //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
121   tmpIndFX = 16384; // Q14(1.0)
122   tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
123   nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
124   //use larger width in tanh map for pause regions
125   if (tmp32no1 < 0) {
126     tmpIndFX = 0;
127     tmp32no1 = -tmp32no1;
128     //widthPrior = widthPrior * 2.0;
129     nShifts++;
130   }
131   tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
132   // compute indicator function: sigmoid map
133   tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
134   if ((tableIndex < 16) && (tableIndex >= 0)) {
135     tmp16no2 = kIndicatorTable[tableIndex];
136     tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
137     frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
138     tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
139     if (tmpIndFX == 0) {
140       tmpIndFX = 8192 - tmp16no2; // Q14
141     } else {
142       tmpIndFX = 8192 + tmp16no2; // Q14
143     }
144   }
145   indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
146
147   //spectral flatness feature
148   if (inst->weightSpecFlat) {
149     tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
150     tmpIndFX = 16384; // Q14(1.0)
151     //use larger width in tanh map for pause regions
152     tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
153     nShifts = 4;
154     if (inst->thresholdSpecFlat < tmpU32no1) {
155       tmpIndFX = 0;
156       tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
157       //widthPrior = widthPrior * 2.0;
158       nShifts++;
159     }
160     tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25);  //Q14
161     // compute indicator function: sigmoid map
162     // FLOAT code
163     // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
164     //                          (threshPrior1 - tmpFloat1)) + 1.0);
165     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
166     if (tableIndex < 16) {
167       tmp16no2 = kIndicatorTable[tableIndex];
168       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
169       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
170       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
171       if (tmpIndFX) {
172         tmpIndFX = 8192 + tmp16no2; // Q14
173       } else {
174         tmpIndFX = 8192 - tmp16no2; // Q14
175       }
176     }
177     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
178   }
179
180   //for template spectral-difference
181   if (inst->weightSpecDiff) {
182     tmpU32no1 = 0;
183     if (inst->featureSpecDiff) {
184       normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
185                                WebRtcSpl_NormU32(inst->featureSpecDiff));
186       assert(normTmp >= 0);
187       tmpU32no1 = inst->featureSpecDiff << normTmp;  // Q(normTmp-2*stages)
188       tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
189                                         20 - inst->stages - normTmp);
190       if (tmpU32no2 > 0) {
191         // Q(20 - inst->stages)
192         tmpU32no1 /= tmpU32no2;
193       } else {
194         tmpU32no1 = (uint32_t)(0x7fffffff);
195       }
196     }
197     tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
198     tmpU32no2 = tmpU32no1 - tmpU32no3;
199     nShifts = 1;
200     tmpIndFX = 16384; // Q14(1.0)
201     //use larger width in tanh map for pause regions
202     if (tmpU32no2 & 0x80000000) {
203       tmpIndFX = 0;
204       tmpU32no2 = tmpU32no3 - tmpU32no1;
205       //widthPrior = widthPrior * 2.0;
206       nShifts--;
207     }
208     tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
209     // compute indicator function: sigmoid map
210     /* FLOAT code
211      indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
212      */
213     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
214     if (tableIndex < 16) {
215       tmp16no2 = kIndicatorTable[tableIndex];
216       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
217       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
218       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
219                     tmp16no1, frac, 14);
220       if (tmpIndFX) {
221         tmpIndFX = 8192 + tmp16no2;
222       } else {
223         tmpIndFX = 8192 - tmp16no2;
224       }
225     }
226     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
227   }
228
229   //combine the indicator function with the feature weights
230   // FLOAT code
231   // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
232   //                 indicator1 + weightIndPrior2 * indicator2);
233   indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
234   // done with computing indicator function
235
236   //compute the prior probability
237   // FLOAT code
238   // inst->priorNonSpeechProb += PRIOR_UPDATE *
239   //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
240   tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
241   inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
242                                 PRIOR_UPDATE_Q14, tmp16, 14); // Q14
243
244   //final speech probability: combine prior model with LR factor:
245
246   memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
247
248   if (inst->priorNonSpeechProb > 0) {
249     r0 = inst->priorNonSpeechProb;
250     r1 = 16384 - r0;
251     int32_t const_23637 = 23637;
252     int32_t const_44 = 44;
253     int32_t const_84 = 84;
254     int32_t const_1 = 1;
255     int32_t const_neg8 = -8;
256     for (i = 0; i < inst->magnLen; i++) {
257       r2 = inst->logLrtTimeAvgW32[i];
258       if (r2 < 65300) {
259         __asm __volatile(
260           ".set         push                                      \n\t"
261           ".set         noreorder                                 \n\t"
262           "mul          %[r2],    %[r2],          %[const_23637]  \n\t"
263           "sll          %[r6],    %[r1],          16              \n\t"
264           "clz          %[r7],    %[r6]                           \n\t"
265           "clo          %[r8],    %[r6]                           \n\t"
266           "slt          %[r9],    %[r6],          $0              \n\t"
267           "movn         %[r7],    %[r8],          %[r9]           \n\t"
268           "sra          %[r2],    %[r2],          14              \n\t"
269           "andi         %[r3],    %[r2],          0xfff           \n\t"
270           "mul          %[r4],    %[r3],          %[r3]           \n\t"
271           "mul          %[r3],    %[r3],          %[const_84]     \n\t"
272           "sra          %[r2],    %[r2],          12              \n\t"
273           "slt          %[r5],    %[r2],          %[const_neg8]   \n\t"
274           "movn         %[r2],    %[const_neg8],  %[r5]           \n\t"
275           "mul          %[r4],    %[r4],          %[const_44]     \n\t"
276           "sra          %[r3],    %[r3],          7               \n\t"
277           "addiu        %[r7],    %[r7],          -1              \n\t"
278           "slti         %[r9],    %[r7],          31              \n\t"
279           "movz         %[r7],    $0,             %[r9]           \n\t"
280           "sra          %[r4],    %[r4],          19              \n\t"
281           "addu         %[r4],    %[r4],          %[r3]           \n\t"
282           "addiu        %[r3],    %[r2],          8               \n\t"
283           "addiu        %[r2],    %[r2],          -4              \n\t"
284           "neg          %[r5],    %[r2]                           \n\t"
285           "sllv         %[r6],    %[r4],          %[r2]           \n\t"
286           "srav         %[r5],    %[r4],          %[r5]           \n\t"
287           "slt          %[r2],    %[r2],          $0              \n\t"
288           "movn         %[r6],    %[r5],          %[r2]           \n\t"
289           "sllv         %[r3],    %[const_1],     %[r3]           \n\t"
290           "addu         %[r2],    %[r3],          %[r6]           \n\t"
291           "clz          %[r4],    %[r2]                           \n\t"
292           "clo          %[r5],    %[r2]                           \n\t"
293           "slt          %[r8],    %[r2],          $0              \n\t"
294           "movn         %[r4],    %[r5],          %[r8]           \n\t"
295           "addiu        %[r4],    %[r4],          -1              \n\t"
296           "slt          %[r5],    $0,             %[r2]           \n\t"
297           "or           %[r5],    %[r5],          %[r7]           \n\t"
298           "movz         %[r4],    $0,             %[r5]           \n\t"
299           "addiu        %[r6],    %[r7],          -7              \n\t"
300           "addu         %[r6],    %[r6],          %[r4]           \n\t"
301           "bltz         %[r6],    1f                              \n\t"
302           " nop                                                   \n\t"
303           "addiu        %[r4],    %[r6],          -8              \n\t"
304           "neg          %[r3],    %[r4]                           \n\t"
305           "srav         %[r5],    %[r2],          %[r3]           \n\t"
306           "mul          %[r5],    %[r5],          %[r1]           \n\t"
307           "mul          %[r2],    %[r2],          %[r1]           \n\t"
308           "slt          %[r4],    %[r4],          $0              \n\t"
309           "srav         %[r5],    %[r5],          %[r6]           \n\t"
310           "sra          %[r2],    %[r2],          8               \n\t"
311           "movn         %[r2],    %[r5],          %[r4]           \n\t"
312           "sll          %[r3],    %[r0],          8               \n\t"
313           "addu         %[r2],    %[r0],          %[r2]           \n\t"
314           "divu         %[r3],    %[r3],          %[r2]           \n\t"
315          "1:                                                      \n\t"
316           ".set         pop                                       \n\t"
317           : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
318             [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
319             [r8] "=&r" (r8), [r9] "=&r" (r9)
320           : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
321             [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
322             [const_1] "r" (const_1), [const_44] "r" (const_44)
323           : "hi", "lo"
324         );
325         nonSpeechProbFinal[i] = r3;
326       }
327     }
328   }
329 }
330
331 // Update analysis buffer for lower band, and window data before FFT.
332 void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
333                                    int16_t* out,
334                                    int16_t* new_speech) {
335
336   int iters, after;
337   int anaLen = inst->anaLen;
338   int *window = (int*)inst->window;
339   int *anaBuf = (int*)inst->analysisBuffer;
340   int *outBuf = (int*)out;
341   int r0, r1, r2, r3, r4, r5, r6, r7;
342 #if defined(MIPS_DSP_R1_LE)
343   int r8;
344 #endif
345
346   // For lower band update analysis buffer.
347   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
348                         inst->analysisBuffer + inst->blockLen10ms,
349                         inst->anaLen - inst->blockLen10ms);
350   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
351       + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
352
353   // Window data before FFT.
354 #if defined(MIPS_DSP_R1_LE)
355   __asm __volatile(
356     ".set              push                                \n\t"
357     ".set              noreorder                           \n\t"
358     "sra               %[iters],   %[anaLen],    3         \n\t"
359    "1:                                                     \n\t"
360     "blez              %[iters],   2f                      \n\t"
361     " nop                                                  \n\t"
362     "lw                %[r0],      0(%[window])            \n\t"
363     "lw                %[r1],      0(%[anaBuf])            \n\t"
364     "lw                %[r2],      4(%[window])            \n\t"
365     "lw                %[r3],      4(%[anaBuf])            \n\t"
366     "lw                %[r4],      8(%[window])            \n\t"
367     "lw                %[r5],      8(%[anaBuf])            \n\t"
368     "lw                %[r6],      12(%[window])           \n\t"
369     "lw                %[r7],      12(%[anaBuf])           \n\t"
370     "muleq_s.w.phl     %[r8],      %[r0],        %[r1]     \n\t"
371     "muleq_s.w.phr     %[r0],      %[r0],        %[r1]     \n\t"
372     "muleq_s.w.phl     %[r1],      %[r2],        %[r3]     \n\t"
373     "muleq_s.w.phr     %[r2],      %[r2],        %[r3]     \n\t"
374     "muleq_s.w.phl     %[r3],      %[r4],        %[r5]     \n\t"
375     "muleq_s.w.phr     %[r4],      %[r4],        %[r5]     \n\t"
376     "muleq_s.w.phl     %[r5],      %[r6],        %[r7]     \n\t"
377     "muleq_s.w.phr     %[r6],      %[r6],        %[r7]     \n\t"
378 #if defined(MIPS_DSP_R2_LE)
379     "precr_sra_r.ph.w  %[r8],      %[r0],        15        \n\t"
380     "precr_sra_r.ph.w  %[r1],      %[r2],        15        \n\t"
381     "precr_sra_r.ph.w  %[r3],      %[r4],        15        \n\t"
382     "precr_sra_r.ph.w  %[r5],      %[r6],        15        \n\t"
383     "sw                %[r8],      0(%[outBuf])            \n\t"
384     "sw                %[r1],      4(%[outBuf])            \n\t"
385     "sw                %[r3],      8(%[outBuf])            \n\t"
386     "sw                %[r5],      12(%[outBuf])           \n\t"
387 #else
388     "shra_r.w          %[r8],      %[r8],        15        \n\t"
389     "shra_r.w          %[r0],      %[r0],        15        \n\t"
390     "shra_r.w          %[r1],      %[r1],        15        \n\t"
391     "shra_r.w          %[r2],      %[r2],        15        \n\t"
392     "shra_r.w          %[r3],      %[r3],        15        \n\t"
393     "shra_r.w          %[r4],      %[r4],        15        \n\t"
394     "shra_r.w          %[r5],      %[r5],        15        \n\t"
395     "shra_r.w          %[r6],      %[r6],        15        \n\t"
396     "sll               %[r0],      %[r0],        16        \n\t"
397     "sll               %[r2],      %[r2],        16        \n\t"
398     "sll               %[r4],      %[r4],        16        \n\t"
399     "sll               %[r6],      %[r6],        16        \n\t"
400     "packrl.ph         %[r0],      %[r8],        %[r0]     \n\t"
401     "packrl.ph         %[r2],      %[r1],        %[r2]     \n\t"
402     "packrl.ph         %[r4],      %[r3],        %[r4]     \n\t"
403     "packrl.ph         %[r6],      %[r5],        %[r6]     \n\t"
404     "sw                %[r0],      0(%[outBuf])            \n\t"
405     "sw                %[r2],      4(%[outBuf])            \n\t"
406     "sw                %[r4],      8(%[outBuf])            \n\t"
407     "sw                %[r6],      12(%[outBuf])           \n\t"
408 #endif
409     "addiu             %[window],  %[window],    16        \n\t"
410     "addiu             %[anaBuf],  %[anaBuf],    16        \n\t"
411     "addiu             %[outBuf],  %[outBuf],    16        \n\t"
412     "b                 1b                                  \n\t"
413     " addiu            %[iters],   %[iters],     -1        \n\t"
414    "2:                                                     \n\t"
415     "andi              %[after],   %[anaLen],    7         \n\t"
416    "3:                                                     \n\t"
417     "blez              %[after],   4f                      \n\t"
418     " nop                                                  \n\t"
419     "lh                %[r0],      0(%[window])            \n\t"
420     "lh                %[r1],      0(%[anaBuf])            \n\t"
421     "mul               %[r0],      %[r0],        %[r1]     \n\t"
422     "addiu             %[window],  %[window],    2         \n\t"
423     "addiu             %[anaBuf],  %[anaBuf],    2         \n\t"
424     "addiu             %[outBuf],  %[outBuf],    2         \n\t"
425     "shra_r.w          %[r0],      %[r0],        14        \n\t"
426     "sh                %[r0],      -2(%[outBuf])           \n\t"
427     "b                 3b                                  \n\t"
428     " addiu            %[after],   %[after],     -1        \n\t"
429    "4:                                                     \n\t"
430     ".set              pop                                 \n\t"
431     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
432       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
433       [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
434       [iters] "=&r" (iters), [after] "=&r" (after),
435       [window] "+r" (window),[anaBuf] "+r" (anaBuf),
436       [outBuf] "+r" (outBuf)
437     : [anaLen] "r" (anaLen)
438     : "memory", "hi", "lo"
439   );
440 #else
441   __asm  __volatile(
442     ".set           push                                    \n\t"
443     ".set           noreorder                               \n\t"
444     "sra            %[iters],   %[anaLen],      2           \n\t"
445    "1:                                                      \n\t"
446     "blez           %[iters],   2f                          \n\t"
447     " nop                                                   \n\t"
448     "lh             %[r0],      0(%[window])                \n\t"
449     "lh             %[r1],      0(%[anaBuf])                \n\t"
450     "lh             %[r2],      2(%[window])                \n\t"
451     "lh             %[r3],      2(%[anaBuf])                \n\t"
452     "lh             %[r4],      4(%[window])                \n\t"
453     "lh             %[r5],      4(%[anaBuf])                \n\t"
454     "lh             %[r6],      6(%[window])                \n\t"
455     "lh             %[r7],      6(%[anaBuf])                \n\t"
456     "mul            %[r0],      %[r0],          %[r1]       \n\t"
457     "mul            %[r2],      %[r2],          %[r3]       \n\t"
458     "mul            %[r4],      %[r4],          %[r5]       \n\t"
459     "mul            %[r6],      %[r6],          %[r7]       \n\t"
460     "addiu          %[window],  %[window],      8           \n\t"
461     "addiu          %[anaBuf],  %[anaBuf],      8           \n\t"
462     "addiu          %[r0],      %[r0],          0x2000      \n\t"
463     "addiu          %[r2],      %[r2],          0x2000      \n\t"
464     "addiu          %[r4],      %[r4],          0x2000      \n\t"
465     "addiu          %[r6],      %[r6],          0x2000      \n\t"
466     "sra            %[r0],      %[r0],          14          \n\t"
467     "sra            %[r2],      %[r2],          14          \n\t"
468     "sra            %[r4],      %[r4],          14          \n\t"
469     "sra            %[r6],      %[r6],          14          \n\t"
470     "sh             %[r0],      0(%[outBuf])                \n\t"
471     "sh             %[r2],      2(%[outBuf])                \n\t"
472     "sh             %[r4],      4(%[outBuf])                \n\t"
473     "sh             %[r6],      6(%[outBuf])                \n\t"
474     "addiu          %[outBuf],  %[outBuf],      8           \n\t"
475     "b              1b                                      \n\t"
476     " addiu         %[iters],   %[iters],       -1          \n\t"
477    "2:                                                      \n\t"
478     "andi           %[after],   %[anaLen],      3           \n\t"
479    "3:                                                      \n\t"
480     "blez           %[after],   4f                          \n\t"
481     " nop                                                   \n\t"
482     "lh             %[r0],      0(%[window])                \n\t"
483     "lh             %[r1],      0(%[anaBuf])                \n\t"
484     "mul            %[r0],      %[r0],          %[r1]       \n\t"
485     "addiu          %[window],  %[window],      2           \n\t"
486     "addiu          %[anaBuf],  %[anaBuf],      2           \n\t"
487     "addiu          %[outBuf],  %[outBuf],      2           \n\t"
488     "addiu          %[r0],      %[r0],          0x2000      \n\t"
489     "sra            %[r0],      %[r0],          14          \n\t"
490     "sh             %[r0],      -2(%[outBuf])               \n\t"
491     "b              3b                                      \n\t"
492     " addiu         %[after],   %[after],       -1          \n\t"
493    "4:                                                      \n\t"
494     ".set           pop                                     \n\t"
495     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
496       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
497       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
498       [after] "=&r" (after), [window] "+r" (window),
499       [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
500     : [anaLen] "r" (anaLen)
501     : "memory", "hi", "lo"
502   );
503 #endif
504 }
505
506 // For the noise supression process, synthesis, read out fully processed
507 // segment, and update synthesis buffer.
508 void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
509                                     int16_t* out_frame,
510                                     int16_t gain_factor) {
511
512   int iters = inst->blockLen10ms >> 2;
513   int after = inst->blockLen10ms & 3;
514   int r0, r1, r2, r3, r4, r5, r6, r7;
515   int16_t *window = (int16_t*)inst->window;
516   int16_t *real = inst->real;
517   int16_t *synthBuf = inst->synthesisBuffer;
518   int16_t *out = out_frame;
519   int sat_pos = 0x7fff;
520   int sat_neg = 0xffff8000;
521   int block10 = (int)inst->blockLen10ms;
522   int anaLen = (int)inst->anaLen;
523
524   __asm __volatile(
525     ".set       push                                        \n\t"
526     ".set       noreorder                                   \n\t"
527    "1:                                                      \n\t"
528     "blez       %[iters],   2f                              \n\t"
529     " nop                                                   \n\t"
530     "lh         %[r0],      0(%[window])                    \n\t"
531     "lh         %[r1],      0(%[real])                      \n\t"
532     "lh         %[r2],      2(%[window])                    \n\t"
533     "lh         %[r3],      2(%[real])                      \n\t"
534     "lh         %[r4],      4(%[window])                    \n\t"
535     "lh         %[r5],      4(%[real])                      \n\t"
536     "lh         %[r6],      6(%[window])                    \n\t"
537     "lh         %[r7],      6(%[real])                      \n\t"
538     "mul        %[r0],      %[r0],          %[r1]           \n\t"
539     "mul        %[r2],      %[r2],          %[r3]           \n\t"
540     "mul        %[r4],      %[r4],          %[r5]           \n\t"
541     "mul        %[r6],      %[r6],          %[r7]           \n\t"
542     "addiu      %[r0],      %[r0],          0x2000          \n\t"
543     "addiu      %[r2],      %[r2],          0x2000          \n\t"
544     "addiu      %[r4],      %[r4],          0x2000          \n\t"
545     "addiu      %[r6],      %[r6],          0x2000          \n\t"
546     "sra        %[r0],      %[r0],          14              \n\t"
547     "sra        %[r2],      %[r2],          14              \n\t"
548     "sra        %[r4],      %[r4],          14              \n\t"
549     "sra        %[r6],      %[r6],          14              \n\t"
550     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
551     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
552     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
553     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
554     "addiu      %[r0],      %[r0],          0x1000          \n\t"
555     "addiu      %[r2],      %[r2],          0x1000          \n\t"
556     "addiu      %[r4],      %[r4],          0x1000          \n\t"
557     "addiu      %[r6],      %[r6],          0x1000          \n\t"
558     "sra        %[r0],      %[r0],          13              \n\t"
559     "sra        %[r2],      %[r2],          13              \n\t"
560     "sra        %[r4],      %[r4],          13              \n\t"
561     "sra        %[r6],      %[r6],          13              \n\t"
562     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
563     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
564     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
565     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
566     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
567     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
568     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
569     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
570     "lh         %[r1],      0(%[synthBuf])                  \n\t"
571     "lh         %[r3],      2(%[synthBuf])                  \n\t"
572     "lh         %[r5],      4(%[synthBuf])                  \n\t"
573     "lh         %[r7],      6(%[synthBuf])                  \n\t"
574     "addu       %[r0],      %[r0],          %[r1]           \n\t"
575     "addu       %[r2],      %[r2],          %[r3]           \n\t"
576     "addu       %[r4],      %[r4],          %[r5]           \n\t"
577     "addu       %[r6],      %[r6],          %[r7]           \n\t"
578     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
579     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
580     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
581     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
582     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
583     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
584     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
585     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
586     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
587     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
588     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
589     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
590     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
591     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
592     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
593     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
594     "sh         %[r0],      0(%[synthBuf])                  \n\t"
595     "sh         %[r2],      2(%[synthBuf])                  \n\t"
596     "sh         %[r4],      4(%[synthBuf])                  \n\t"
597     "sh         %[r6],      6(%[synthBuf])                  \n\t"
598     "sh         %[r0],      0(%[out])                       \n\t"
599     "sh         %[r2],      2(%[out])                       \n\t"
600     "sh         %[r4],      4(%[out])                       \n\t"
601     "sh         %[r6],      6(%[out])                       \n\t"
602     "addiu      %[window],  %[window],      8               \n\t"
603     "addiu      %[real],    %[real],        8               \n\t"
604     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
605     "addiu      %[out],     %[out],         8               \n\t"
606     "b          1b                                          \n\t"
607     " addiu     %[iters],   %[iters],       -1              \n\t"
608    "2:                                                      \n\t"
609     "blez       %[after],   3f                              \n\t"
610     " subu      %[block10], %[anaLen],      %[block10]      \n\t"
611     "lh         %[r0],      0(%[window])                    \n\t"
612     "lh         %[r1],      0(%[real])                      \n\t"
613     "mul        %[r0],      %[r0],          %[r1]           \n\t"
614     "addiu      %[window],  %[window],      2               \n\t"
615     "addiu      %[real],    %[real],        2               \n\t"
616     "addiu      %[r0],      %[r0],          0x2000          \n\t"
617     "sra        %[r0],      %[r0],          14              \n\t"
618     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
619     "addiu      %[r0],      %[r0],          0x1000          \n\t"
620     "sra        %[r0],      %[r0],          13              \n\t"
621     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
622     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
623     "lh         %[r1],      0(%[synthBuf])                  \n\t"
624     "addu       %[r0],      %[r0],          %[r1]           \n\t"
625     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
626     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
627     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
628     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
629     "sh         %[r0],      0(%[synthBuf])                  \n\t"
630     "sh         %[r0],      0(%[out])                       \n\t"
631     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
632     "addiu      %[out],     %[out],         2               \n\t"
633     "b          2b                                          \n\t"
634     " addiu     %[after],   %[after],       -1              \n\t"
635    "3:                                                      \n\t"
636     "sra        %[iters],   %[block10],     2               \n\t"
637    "4:                                                      \n\t"
638     "blez       %[iters],   5f                              \n\t"
639     " andi      %[after],   %[block10],     3               \n\t"
640     "lh         %[r0],      0(%[window])                    \n\t"
641     "lh         %[r1],      0(%[real])                      \n\t"
642     "lh         %[r2],      2(%[window])                    \n\t"
643     "lh         %[r3],      2(%[real])                      \n\t"
644     "lh         %[r4],      4(%[window])                    \n\t"
645     "lh         %[r5],      4(%[real])                      \n\t"
646     "lh         %[r6],      6(%[window])                    \n\t"
647     "lh         %[r7],      6(%[real])                      \n\t"
648     "mul        %[r0],      %[r0],          %[r1]           \n\t"
649     "mul        %[r2],      %[r2],          %[r3]           \n\t"
650     "mul        %[r4],      %[r4],          %[r5]           \n\t"
651     "mul        %[r6],      %[r6],          %[r7]           \n\t"
652     "addiu      %[r0],      %[r0],          0x2000          \n\t"
653     "addiu      %[r2],      %[r2],          0x2000          \n\t"
654     "addiu      %[r4],      %[r4],          0x2000          \n\t"
655     "addiu      %[r6],      %[r6],          0x2000          \n\t"
656     "sra        %[r0],      %[r0],          14              \n\t"
657     "sra        %[r2],      %[r2],          14              \n\t"
658     "sra        %[r4],      %[r4],          14              \n\t"
659     "sra        %[r6],      %[r6],          14              \n\t"
660     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
661     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
662     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
663     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
664     "addiu      %[r0],      %[r0],          0x1000          \n\t"
665     "addiu      %[r2],      %[r2],          0x1000          \n\t"
666     "addiu      %[r4],      %[r4],          0x1000          \n\t"
667     "addiu      %[r6],      %[r6],          0x1000          \n\t"
668     "sra        %[r0],      %[r0],          13              \n\t"
669     "sra        %[r2],      %[r2],          13              \n\t"
670     "sra        %[r4],      %[r4],          13              \n\t"
671     "sra        %[r6],      %[r6],          13              \n\t"
672     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
673     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
674     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
675     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
676     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
677     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
678     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
679     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
680     "lh         %[r1],      0(%[synthBuf])                  \n\t"
681     "lh         %[r3],      2(%[synthBuf])                  \n\t"
682     "lh         %[r5],      4(%[synthBuf])                  \n\t"
683     "lh         %[r7],      6(%[synthBuf])                  \n\t"
684     "addu       %[r0],      %[r0],          %[r1]           \n\t"
685     "addu       %[r2],      %[r2],          %[r3]           \n\t"
686     "addu       %[r4],      %[r4],          %[r5]           \n\t"
687     "addu       %[r6],      %[r6],          %[r7]           \n\t"
688     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
689     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
690     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
691     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
692     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
693     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
694     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
695     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
696     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
697     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
698     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
699     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
700     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
701     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
702     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
703     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
704     "sh         %[r0],      0(%[synthBuf])                  \n\t"
705     "sh         %[r2],      2(%[synthBuf])                  \n\t"
706     "sh         %[r4],      4(%[synthBuf])                  \n\t"
707     "sh         %[r6],      6(%[synthBuf])                  \n\t"
708     "addiu      %[window],  %[window],      8               \n\t"
709     "addiu      %[real],    %[real],        8               \n\t"
710     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
711     "b          4b                                          \n\t"
712     " addiu     %[iters],   %[iters],       -1              \n\t"
713    "5:                                                      \n\t"
714     "blez       %[after],   6f                              \n\t"
715     " nop                                                   \n\t"
716     "lh         %[r0],      0(%[window])                    \n\t"
717     "lh         %[r1],      0(%[real])                      \n\t"
718     "mul        %[r0],      %[r0],          %[r1]           \n\t"
719     "addiu      %[window],  %[window],      2               \n\t"
720     "addiu      %[real],    %[real],        2               \n\t"
721     "addiu      %[r0],      %[r0],          0x2000          \n\t"
722     "sra        %[r0],      %[r0],          14              \n\t"
723     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
724     "addiu      %[r0],      %[r0],          0x1000          \n\t"
725     "sra        %[r0],      %[r0],          13              \n\t"
726     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
727     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
728     "lh         %[r1],      0(%[synthBuf])                  \n\t"
729     "addu       %[r0],      %[r0],          %[r1]           \n\t"
730     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
731     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
732     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
733     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
734     "sh         %[r0],      0(%[synthBuf])                  \n\t"
735     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
736     "b          2b                                          \n\t"
737     " addiu     %[after],   %[after],       -1              \n\t"
738    "6:                                                      \n\t"
739     ".set       pop                                         \n\t"
740     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
741       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
742       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
743       [after] "+r" (after), [block10] "+r" (block10),
744       [window] "+r" (window), [real] "+r" (real),
745       [synthBuf] "+r" (synthBuf), [out] "+r" (out)
746     : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
747       [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
748     : "memory", "hi", "lo"
749   );
750
751   // update synthesis buffer
752   WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
753                         inst->synthesisBuffer + inst->blockLen10ms,
754                         inst->anaLen - inst->blockLen10ms);
755   WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
756       + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
757 }
758
759 // Filter the data in the frequency domain, and create spectrum.
760 void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buf) {
761
762   uint16_t *noiseSupFilter = inst->noiseSupFilter;
763   int16_t *real = inst->real;
764   int16_t *imag = inst->imag;
765   int32_t loop_count = 2;
766   int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
767   int16_t tmp16 = (inst->anaLen << 1) - 4;
768   int16_t* freq_buf_f = freq_buf;
769   int16_t* freq_buf_s = &freq_buf[tmp16];
770
771   __asm __volatile (
772     ".set       push                                                 \n\t"
773     ".set       noreorder                                            \n\t"
774     //first sample
775     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
776     "lh         %[tmp_2],           0(%[real])                       \n\t"
777     "lh         %[tmp_3],           0(%[imag])                       \n\t"
778     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
779     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
780     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
781     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
782     "sh         %[tmp_2],           0(%[real])                       \n\t"
783     "sh         %[tmp_3],           0(%[imag])                       \n\t"
784     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
785     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
786     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
787     "addiu      %[real],            %[real],              2          \n\t"
788     "addiu      %[imag],            %[imag],              2          \n\t"
789     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    2          \n\t"
790     "addiu      %[freq_buf_f],      %[freq_buf_f],        4          \n\t"
791    "1:                                                               \n\t"
792     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
793     "lh         %[tmp_2],           0(%[real])                       \n\t"
794     "lh         %[tmp_3],           0(%[imag])                       \n\t"
795     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
796     "lh         %[tmp_5],           2(%[real])                       \n\t"
797     "lh         %[tmp_6],           2(%[imag])                       \n\t"
798     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
799     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
800     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
801     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
802     "addiu      %[loop_count],      %[loop_count],        2          \n\t"
803     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
804     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
805     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
806     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
807     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    4          \n\t"
808     "sh         %[tmp_2],           0(%[real])                       \n\t"
809     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
810     "sh         %[tmp_3],           0(%[imag])                       \n\t"
811     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
812     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
813     "sh         %[tmp_5],           2(%[real])                       \n\t"
814     "sh         %[tmp_5],           0(%[freq_buf_s])                 \n\t"
815     "sh         %[tmp_6],           2(%[imag])                       \n\t"
816     "sh         %[tmp_6],           2(%[freq_buf_s])                 \n\t"
817     "negu       %[tmp_6],           %[tmp_6]                         \n\t"
818     "addiu      %[freq_buf_s],      %[freq_buf_s],        -8         \n\t"
819     "addiu      %[real],            %[real],              4          \n\t"
820     "addiu      %[imag],            %[imag],              4          \n\t"
821     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
822     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
823     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
824     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
825     "blt        %[loop_count],      %[loop_size],         1b         \n\t"
826     " addiu     %[freq_buf_f],      %[freq_buf_f],        8          \n\t"
827     //last two samples:
828     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
829     "lh         %[tmp_2],           0(%[real])                       \n\t"
830     "lh         %[tmp_3],           0(%[imag])                       \n\t"
831     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
832     "lh         %[tmp_5],           2(%[real])                       \n\t"
833     "lh         %[tmp_6],           2(%[imag])                       \n\t"
834     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
835     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
836     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
837     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
838     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
839     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
840     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
841     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
842     "sh         %[tmp_2],           0(%[real])                       \n\t"
843     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
844     "sh         %[tmp_3],           0(%[imag])                       \n\t"
845     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
846     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
847     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
848     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
849     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
850     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
851     "sh         %[tmp_5],           2(%[real])                       \n\t"
852     "sh         %[tmp_6],           2(%[imag])                       \n\t"
853     ".set       pop                                                  \n\t"
854     : [real] "+r" (real), [imag] "+r" (imag),
855       [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
856       [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
857       [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
858       [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
859     : [loop_size] "r" (inst->anaLen2)
860     : "memory", "hi", "lo"
861   );
862 }
863
864 #if defined(MIPS_DSP_R1_LE)
865 // Denormalize the real-valued signal |in|, the output from inverse FFT.
866 void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor) {
867   int32_t r0, r1, r2, r3, t0;
868   int len = inst->anaLen;
869   int16_t *out = &inst->real[0];
870   int shift = factor - inst->normData;
871
872   __asm __volatile (
873     ".set          push                                \n\t"
874     ".set          noreorder                           \n\t"
875     "beqz          %[len],     8f                      \n\t"
876     " nop                                              \n\t"
877     "bltz          %[shift],   4f                      \n\t"
878     " sra          %[t0],      %[len],      2          \n\t"
879     "beqz          %[t0],      2f                      \n\t"
880     " andi         %[len],     %[len],      3          \n\t"
881    "1:                                                 \n\t"
882     "lh            %[r0],      0(%[in])                \n\t"
883     "lh            %[r1],      2(%[in])                \n\t"
884     "lh            %[r2],      4(%[in])                \n\t"
885     "lh            %[r3],      6(%[in])                \n\t"
886     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
887     "shllv_s.ph    %[r1],      %[r1],       %[shift]   \n\t"
888     "shllv_s.ph    %[r2],      %[r2],       %[shift]   \n\t"
889     "shllv_s.ph    %[r3],      %[r3],       %[shift]   \n\t"
890     "addiu         %[in],      %[in],       8          \n\t"
891     "addiu         %[t0],      %[t0],       -1         \n\t"
892     "sh            %[r0],      0(%[out])               \n\t"
893     "sh            %[r1],      2(%[out])               \n\t"
894     "sh            %[r2],      4(%[out])               \n\t"
895     "sh            %[r3],      6(%[out])               \n\t"
896     "bgtz          %[t0],      1b                      \n\t"
897     " addiu        %[out],     %[out],      8          \n\t"
898    "2:                                                 \n\t"
899     "beqz          %[len],     8f                      \n\t"
900     " nop                                              \n\t"
901    "3:                                                 \n\t"
902     "lh            %[r0],      0(%[in])                \n\t"
903     "addiu         %[in],      %[in],       2          \n\t"
904     "addiu         %[len],     %[len],      -1         \n\t"
905     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
906     "addiu         %[out],     %[out],      2          \n\t"
907     "bgtz          %[len],     3b                      \n\t"
908     " sh           %[r0],      -2(%[out])              \n\t"
909     "b             8f                                  \n\t"
910    "4:                                                 \n\t"
911     "negu          %[shift],   %[shift]                \n\t"
912     "beqz          %[t0],      6f                      \n\t"
913     " andi         %[len],     %[len],      3          \n\t"
914    "5:                                                 \n\t"
915     "lh            %[r0],      0(%[in])                \n\t"
916     "lh            %[r1],      2(%[in])                \n\t"
917     "lh            %[r2],      4(%[in])                \n\t"
918     "lh            %[r3],      6(%[in])                \n\t"
919     "srav          %[r0],      %[r0],       %[shift]   \n\t"
920     "srav          %[r1],      %[r1],       %[shift]   \n\t"
921     "srav          %[r2],      %[r2],       %[shift]   \n\t"
922     "srav          %[r3],      %[r3],       %[shift]   \n\t"
923     "addiu         %[in],      %[in],       8          \n\t"
924     "addiu         %[t0],      %[t0],       -1         \n\t"
925     "sh            %[r0],      0(%[out])               \n\t"
926     "sh            %[r1],      2(%[out])               \n\t"
927     "sh            %[r2],      4(%[out])               \n\t"
928     "sh            %[r3],      6(%[out])               \n\t"
929     "bgtz          %[t0],      5b                      \n\t"
930     " addiu        %[out],     %[out],      8          \n\t"
931    "6:                                                 \n\t"
932     "beqz          %[len],     8f                      \n\t"
933     " nop                                              \n\t"
934    "7:                                                 \n\t"
935     "lh            %[r0],      0(%[in])                \n\t"
936     "addiu         %[in],      %[in],       2          \n\t"
937     "addiu         %[len],     %[len],      -1         \n\t"
938     "srav          %[r0],      %[r0],       %[shift]   \n\t"
939     "addiu         %[out],     %[out],      2          \n\t"
940     "bgtz          %[len],     7b                      \n\t"
941     " sh           %[r0],      -2(%[out])              \n\t"
942    "8:                                                 \n\t"
943     ".set          pop                                 \n\t"
944     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
945       [r2] "=&r" (r2), [r3] "=&r" (r3)
946     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
947       [out] "r" (out)
948     : "memory"
949   );
950 }
951 #endif
952
953 // Normalize the real-valued signal |in|, the input to forward FFT.
954 void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
955                                         const int16_t* in,
956                                         int16_t* out) {
957   int32_t r0, r1, r2, r3, t0;
958   int len = inst->anaLen;
959   int shift = inst->normData;
960
961   __asm __volatile (
962     ".set          push                                \n\t"
963     ".set          noreorder                           \n\t"
964     "beqz          %[len],     4f                      \n\t"
965     " sra          %[t0],      %[len],      2          \n\t"
966     "beqz          %[t0],      2f                      \n\t"
967     " andi         %[len],     %[len],      3          \n\t"
968    "1:                                                 \n\t"
969     "lh            %[r0],      0(%[in])                \n\t"
970     "lh            %[r1],      2(%[in])                \n\t"
971     "lh            %[r2],      4(%[in])                \n\t"
972     "lh            %[r3],      6(%[in])                \n\t"
973     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
974     "sllv          %[r1],      %[r1],       %[shift]   \n\t"
975     "sllv          %[r2],      %[r2],       %[shift]   \n\t"
976     "sllv          %[r3],      %[r3],       %[shift]   \n\t"
977     "addiu         %[in],      %[in],       8          \n\t"
978     "addiu         %[t0],      %[t0],       -1         \n\t"
979     "sh            %[r0],      0(%[out])               \n\t"
980     "sh            %[r1],      2(%[out])               \n\t"
981     "sh            %[r2],      4(%[out])               \n\t"
982     "sh            %[r3],      6(%[out])               \n\t"
983     "bgtz          %[t0],      1b                      \n\t"
984     " addiu        %[out],     %[out],      8          \n\t"
985    "2:                                                 \n\t"
986     "beqz          %[len],     4f                      \n\t"
987     " nop                                              \n\t"
988    "3:                                                 \n\t"
989     "lh            %[r0],      0(%[in])                \n\t"
990     "addiu         %[in],      %[in],       2          \n\t"
991     "addiu         %[len],     %[len],      -1         \n\t"
992     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
993     "addiu         %[out],     %[out],      2          \n\t"
994     "bgtz          %[len],     3b                      \n\t"
995     " sh           %[r0],      -2(%[out])              \n\t"
996    "4:                                                 \n\t"
997     ".set          pop                                 \n\t"
998     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
999       [r2] "=&r" (r2), [r3] "=&r" (r3)
1000     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
1001       [out] "r" (out)
1002     : "memory"
1003   );
1004 }
1005