ccb0c37632466ed3dc8a11b0977ebedafb533de6
[platform/framework/web/crosswalk.git] / src / third_party / webrtc / modules / audio_processing / ns / nsx_core_mips.c
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
12 #include "webrtc/modules/audio_processing/ns/nsx_core.h"
13
14 static const int16_t kIndicatorTable[17] = {
15   0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
16   7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
17 };
18
19 // Compute speech/noise probability
20 // speech/noise probability is returned in: probSpeechFinal
21 //snrLocPrior is the prior SNR for each frequency (in Q11)
22 //snrLocPost is the post SNR for each frequency (in Q11)
23 void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
24                                uint16_t* nonSpeechProbFinal,
25                                uint32_t* priorLocSnr,
26                                uint32_t* postLocSnr) {
27
28   uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
29   int32_t indPriorFX, tmp32no1;
30   int32_t logLrtTimeAvgKsumFX;
31   int16_t indPriorFX16;
32   int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
33   int i, normTmp, nShifts;
34
35   int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
36   int32_t const_max = 0x7fffffff;
37   int32_t const_neg43 = -43;
38   int32_t const_5412 = 5412;
39   int32_t const_11rsh12 = (11 << 12);
40   int32_t const_178 = 178;
41
42
43   // compute feature based on average LR factor
44   // this is the average over all frequencies of the smooth log LRT
45   logLrtTimeAvgKsumFX = 0;
46   for (i = 0; i < inst->magnLen; i++) {
47     r0 = postLocSnr[i]; // Q11
48     r1 = priorLocSnr[i];
49     r2 = inst->logLrtTimeAvgW32[i];
50
51     __asm __volatile(
52       ".set       push                                    \n\t"
53       ".set       noreorder                               \n\t"
54       "clz        %[r3],    %[r0]                         \n\t"
55       "clz        %[r5],    %[r1]                         \n\t"
56       "slti       %[r4],    %[r3],    32                  \n\t"
57       "slti       %[r6],    %[r5],    32                  \n\t"
58       "movz       %[r3],    $0,       %[r4]               \n\t"
59       "movz       %[r5],    $0,       %[r6]               \n\t"
60       "slti       %[r4],    %[r3],    11                  \n\t"
61       "addiu      %[r6],    %[r3],    -11                 \n\t"
62       "neg        %[r7],    %[r6]                         \n\t"
63       "sllv       %[r6],    %[r1],    %[r6]               \n\t"
64       "srav       %[r7],    %[r1],    %[r7]               \n\t"
65       "movn       %[r6],    %[r7],    %[r4]               \n\t"
66       "sllv       %[r1],    %[r1],    %[r5]               \n\t"
67       "and        %[r1],    %[r1],    %[const_max]        \n\t"
68       "sra        %[r1],    %[r1],    19                  \n\t"
69       "mul        %[r7],    %[r1],    %[r1]               \n\t"
70       "sllv       %[r3],    %[r0],    %[r3]               \n\t"
71       "divu       %[r8],    %[r3],    %[r6]               \n\t"
72       "slti       %[r6],    %[r6],    1                   \n\t"
73       "mul        %[r7],    %[r7],    %[const_neg43]      \n\t"
74       "sra        %[r7],    %[r7],    19                  \n\t"
75       "movz       %[r3],    %[r8],    %[r6]               \n\t"
76       "subu       %[r0],    %[r0],    %[r3]               \n\t"
77       "mul        %[r1],    %[r1],    %[const_5412]       \n\t"
78       "sra        %[r1],    %[r1],    12                  \n\t"
79       "addu       %[r7],    %[r7],    %[r1]               \n\t"
80       "addiu      %[r1],    %[r7],    37                  \n\t"
81       "addiu      %[r5],    %[r5],    -31                 \n\t"
82       "neg        %[r5],    %[r5]                         \n\t"
83       "sll        %[r5],    %[r5],    12                  \n\t"
84       "addu       %[r5],    %[r5],    %[r1]               \n\t"
85       "subu       %[r7],    %[r5],    %[const_11rsh12]    \n\t"
86       "mul        %[r7],    %[r7],    %[const_178]        \n\t"
87       "sra        %[r7],    %[r7],    8                   \n\t"
88       "addu       %[r7],    %[r7],    %[r2]               \n\t"
89       "sra        %[r7],    %[r7],    1                   \n\t"
90       "subu       %[r2],    %[r2],    %[r7]               \n\t"
91       "addu       %[r2],    %[r2],    %[r0]               \n\t"
92       ".set       pop                                     \n\t"
93       : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
94         [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
95         [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
96       : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
97         [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
98         [const_178] "r" (const_178)
99       : "hi", "lo"
100     );
101     inst->logLrtTimeAvgW32[i] = r2;
102     logLrtTimeAvgKsumFX += r2;
103   }
104
105   inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
106                                               inst->stages + 10);
107                                                   // 5 = BIN_SIZE_LRT / 2
108   // done with computation of LR factor
109
110   //
111   // compute the indicator functions
112   //
113
114   // average LRT feature
115   // FLOAT code
116   // indicator0 = 0.5 * (tanh(widthPrior *
117   //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
118   tmpIndFX = 16384; // Q14(1.0)
119   tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
120   nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
121   //use larger width in tanh map for pause regions
122   if (tmp32no1 < 0) {
123     tmpIndFX = 0;
124     tmp32no1 = -tmp32no1;
125     //widthPrior = widthPrior * 2.0;
126     nShifts++;
127   }
128   tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
129   // compute indicator function: sigmoid map
130   tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
131   if ((tableIndex < 16) && (tableIndex >= 0)) {
132     tmp16no2 = kIndicatorTable[tableIndex];
133     tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
134     frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
135     tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
136     if (tmpIndFX == 0) {
137       tmpIndFX = 8192 - tmp16no2; // Q14
138     } else {
139       tmpIndFX = 8192 + tmp16no2; // Q14
140     }
141   }
142   indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
143
144   //spectral flatness feature
145   if (inst->weightSpecFlat) {
146     tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
147     tmpIndFX = 16384; // Q14(1.0)
148     //use larger width in tanh map for pause regions
149     tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
150     nShifts = 4;
151     if (inst->thresholdSpecFlat < tmpU32no1) {
152       tmpIndFX = 0;
153       tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
154       //widthPrior = widthPrior * 2.0;
155       nShifts++;
156     }
157     tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
158                                                                   nShifts), 25);
159                                                      //Q14
160     tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
161                                     25); //Q14
162     // compute indicator function: sigmoid map
163     // FLOAT code
164     // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
165     //                          (threshPrior1 - tmpFloat1)) + 1.0);
166     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
167     if (tableIndex < 16) {
168       tmp16no2 = kIndicatorTable[tableIndex];
169       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
170       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
171       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
172       if (tmpIndFX) {
173         tmpIndFX = 8192 + tmp16no2; // Q14
174       } else {
175         tmpIndFX = 8192 - tmp16no2; // Q14
176       }
177     }
178     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
179   }
180
181   //for template spectral-difference
182   if (inst->weightSpecDiff) {
183     tmpU32no1 = 0;
184     if (inst->featureSpecDiff) {
185       normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
186                                WebRtcSpl_NormU32(inst->featureSpecDiff));
187       tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
188                                                          // Q(normTmp-2*stages)
189       tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
190                                         20 - inst->stages - normTmp);
191       if (tmpU32no2 > 0) {
192         // Q(20 - inst->stages)
193         tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
194       } else {
195         tmpU32no1 = (uint32_t)(0x7fffffff);
196       }
197     }
198     tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
199                                                       17),
200                                 25);
201     tmpU32no2 = tmpU32no1 - tmpU32no3;
202     nShifts = 1;
203     tmpIndFX = 16384; // Q14(1.0)
204     //use larger width in tanh map for pause regions
205     if (tmpU32no2 & 0x80000000) {
206       tmpIndFX = 0;
207       tmpU32no2 = tmpU32no3 - tmpU32no1;
208       //widthPrior = widthPrior * 2.0;
209       nShifts--;
210     }
211     tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
212     // compute indicator function: sigmoid map
213     /* FLOAT code
214      indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
215      */
216     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
217     if (tableIndex < 16) {
218       tmp16no2 = kIndicatorTable[tableIndex];
219       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
220       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
221       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
222                     tmp16no1, frac, 14);
223       if (tmpIndFX) {
224         tmpIndFX = 8192 + tmp16no2;
225       } else {
226         tmpIndFX = 8192 - tmp16no2;
227       }
228     }
229     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
230   }
231
232   //combine the indicator function with the feature weights
233   // FLOAT code
234   // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
235   //                 indicator1 + weightIndPrior2 * indicator2);
236   indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
237   // done with computing indicator function
238
239   //compute the prior probability
240   // FLOAT code
241   // inst->priorNonSpeechProb += PRIOR_UPDATE *
242   //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
243   tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
244   inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
245                                 PRIOR_UPDATE_Q14, tmp16, 14); // Q14
246
247   //final speech probability: combine prior model with LR factor:
248
249   memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
250
251   if (inst->priorNonSpeechProb > 0) {
252     r0 = inst->priorNonSpeechProb;
253     r1 = 16384 - r0;
254     int32_t const_23637 = 23637;
255     int32_t const_44 = 44;
256     int32_t const_84 = 84;
257     int32_t const_1 = 1;
258     int32_t const_neg8 = -8;
259     for (i = 0; i < inst->magnLen; i++) {
260       r2 = inst->logLrtTimeAvgW32[i];
261       if (r2 < 65300) {
262         __asm __volatile(
263           ".set         push                                      \n\t"
264           ".set         noreorder                                 \n\t"
265           "mul          %[r2],    %[r2],          %[const_23637]  \n\t"
266           "sll          %[r6],    %[r1],          16              \n\t"
267           "clz          %[r7],    %[r6]                           \n\t"
268           "clo          %[r8],    %[r6]                           \n\t"
269           "slt          %[r9],    %[r6],          $0              \n\t"
270           "movn         %[r7],    %[r8],          %[r9]           \n\t"
271           "sra          %[r2],    %[r2],          14              \n\t"
272           "andi         %[r3],    %[r2],          0xfff           \n\t"
273           "mul          %[r4],    %[r3],          %[r3]           \n\t"
274           "mul          %[r3],    %[r3],          %[const_84]     \n\t"
275           "sra          %[r2],    %[r2],          12              \n\t"
276           "slt          %[r5],    %[r2],          %[const_neg8]   \n\t"
277           "movn         %[r2],    %[const_neg8],  %[r5]           \n\t"
278           "mul          %[r4],    %[r4],          %[const_44]     \n\t"
279           "sra          %[r3],    %[r3],          7               \n\t"
280           "addiu        %[r7],    %[r7],          -1              \n\t"
281           "slti         %[r9],    %[r7],          31              \n\t"
282           "movz         %[r7],    $0,             %[r9]           \n\t"
283           "sra          %[r4],    %[r4],          19              \n\t"
284           "addu         %[r4],    %[r4],          %[r3]           \n\t"
285           "addiu        %[r3],    %[r2],          8               \n\t"
286           "addiu        %[r2],    %[r2],          -4              \n\t"
287           "neg          %[r5],    %[r2]                           \n\t"
288           "sllv         %[r6],    %[r4],          %[r2]           \n\t"
289           "srav         %[r5],    %[r4],          %[r5]           \n\t"
290           "slt          %[r2],    %[r2],          $0              \n\t"
291           "movn         %[r6],    %[r5],          %[r2]           \n\t"
292           "sllv         %[r3],    %[const_1],     %[r3]           \n\t"
293           "addu         %[r2],    %[r3],          %[r6]           \n\t"
294           "clz          %[r4],    %[r2]                           \n\t"
295           "clo          %[r5],    %[r2]                           \n\t"
296           "slt          %[r8],    %[r2],          $0              \n\t"
297           "movn         %[r4],    %[r5],          %[r8]           \n\t"
298           "addiu        %[r4],    %[r4],          -1              \n\t"
299           "slt          %[r5],    $0,             %[r2]           \n\t"
300           "or           %[r5],    %[r5],          %[r7]           \n\t"
301           "movz         %[r4],    $0,             %[r5]           \n\t"
302           "addiu        %[r6],    %[r7],          -7              \n\t"
303           "addu         %[r6],    %[r6],          %[r4]           \n\t"
304           "bltz         %[r6],    1f                              \n\t"
305           " nop                                                   \n\t"
306           "addiu        %[r4],    %[r6],          -8              \n\t"
307           "neg          %[r3],    %[r4]                           \n\t"
308           "srav         %[r5],    %[r2],          %[r3]           \n\t"
309           "mul          %[r5],    %[r5],          %[r1]           \n\t"
310           "mul          %[r2],    %[r2],          %[r1]           \n\t"
311           "slt          %[r4],    %[r4],          $0              \n\t"
312           "srav         %[r5],    %[r5],          %[r6]           \n\t"
313           "sra          %[r2],    %[r2],          8               \n\t"
314           "movn         %[r2],    %[r5],          %[r4]           \n\t"
315           "sll          %[r3],    %[r0],          8               \n\t"
316           "addu         %[r2],    %[r0],          %[r2]           \n\t"
317           "divu         %[r3],    %[r3],          %[r2]           \n\t"
318          "1:                                                      \n\t"
319           ".set         pop                                       \n\t"
320           : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
321             [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
322             [r8] "=&r" (r8), [r9] "=&r" (r9)
323           : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
324             [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
325             [const_1] "r" (const_1), [const_44] "r" (const_44)
326           : "hi", "lo"
327         );
328         nonSpeechProbFinal[i] = r3;
329       }
330     }
331   }
332 }
333
334 // Update analysis buffer for lower band, and window data before FFT.
335 void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
336                                    int16_t* out,
337                                    int16_t* new_speech) {
338
339   int iters, after;
340   int anaLen = inst->anaLen;
341   int *window = (int*)inst->window;
342   int *anaBuf = (int*)inst->analysisBuffer;
343   int *outBuf = (int*)out;
344   int r0, r1, r2, r3, r4, r5, r6, r7;
345 #if defined(MIPS_DSP_R1_LE)
346   int r8;
347 #endif
348
349   // For lower band update analysis buffer.
350   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
351                         inst->analysisBuffer + inst->blockLen10ms,
352                         inst->anaLen - inst->blockLen10ms);
353   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
354       + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
355
356   // Window data before FFT.
357 #if defined(MIPS_DSP_R1_LE)
358   __asm __volatile(
359     ".set              push                                \n\t"
360     ".set              noreorder                           \n\t"
361     "sra               %[iters],   %[anaLen],    3         \n\t"
362    "1:                                                     \n\t"
363     "blez              %[iters],   2f                      \n\t"
364     " nop                                                  \n\t"
365     "lw                %[r0],      0(%[window])            \n\t"
366     "lw                %[r1],      0(%[anaBuf])            \n\t"
367     "lw                %[r2],      4(%[window])            \n\t"
368     "lw                %[r3],      4(%[anaBuf])            \n\t"
369     "lw                %[r4],      8(%[window])            \n\t"
370     "lw                %[r5],      8(%[anaBuf])            \n\t"
371     "lw                %[r6],      12(%[window])           \n\t"
372     "lw                %[r7],      12(%[anaBuf])           \n\t"
373     "muleq_s.w.phl     %[r8],      %[r0],        %[r1]     \n\t"
374     "muleq_s.w.phr     %[r0],      %[r0],        %[r1]     \n\t"
375     "muleq_s.w.phl     %[r1],      %[r2],        %[r3]     \n\t"
376     "muleq_s.w.phr     %[r2],      %[r2],        %[r3]     \n\t"
377     "muleq_s.w.phl     %[r3],      %[r4],        %[r5]     \n\t"
378     "muleq_s.w.phr     %[r4],      %[r4],        %[r5]     \n\t"
379     "muleq_s.w.phl     %[r5],      %[r6],        %[r7]     \n\t"
380     "muleq_s.w.phr     %[r6],      %[r6],        %[r7]     \n\t"
381 #if defined(MIPS_DSP_R2_LE)
382     "precr_sra_r.ph.w  %[r8],      %[r0],        15        \n\t"
383     "precr_sra_r.ph.w  %[r1],      %[r2],        15        \n\t"
384     "precr_sra_r.ph.w  %[r3],      %[r4],        15        \n\t"
385     "precr_sra_r.ph.w  %[r5],      %[r6],        15        \n\t"
386     "sw                %[r8],      0(%[outBuf])            \n\t"
387     "sw                %[r1],      4(%[outBuf])            \n\t"
388     "sw                %[r3],      8(%[outBuf])            \n\t"
389     "sw                %[r5],      12(%[outBuf])           \n\t"
390 #else
391     "shra_r.w          %[r8],      %[r8],        15        \n\t"
392     "shra_r.w          %[r0],      %[r0],        15        \n\t"
393     "shra_r.w          %[r1],      %[r1],        15        \n\t"
394     "shra_r.w          %[r2],      %[r2],        15        \n\t"
395     "shra_r.w          %[r3],      %[r3],        15        \n\t"
396     "shra_r.w          %[r4],      %[r4],        15        \n\t"
397     "shra_r.w          %[r5],      %[r5],        15        \n\t"
398     "shra_r.w          %[r6],      %[r6],        15        \n\t"
399     "sll               %[r0],      %[r0],        16        \n\t"
400     "sll               %[r2],      %[r2],        16        \n\t"
401     "sll               %[r4],      %[r4],        16        \n\t"
402     "sll               %[r6],      %[r6],        16        \n\t"
403     "packrl.ph         %[r0],      %[r8],        %[r0]     \n\t"
404     "packrl.ph         %[r2],      %[r1],        %[r2]     \n\t"
405     "packrl.ph         %[r4],      %[r3],        %[r4]     \n\t"
406     "packrl.ph         %[r6],      %[r5],        %[r6]     \n\t"
407     "sw                %[r0],      0(%[outBuf])            \n\t"
408     "sw                %[r2],      4(%[outBuf])            \n\t"
409     "sw                %[r4],      8(%[outBuf])            \n\t"
410     "sw                %[r6],      12(%[outBuf])           \n\t"
411 #endif
412     "addiu             %[window],  %[window],    16        \n\t"
413     "addiu             %[anaBuf],  %[anaBuf],    16        \n\t"
414     "addiu             %[outBuf],  %[outBuf],    16        \n\t"
415     "b                 1b                                  \n\t"
416     " addiu            %[iters],   %[iters],     -1        \n\t"
417    "2:                                                     \n\t"
418     "andi              %[after],   %[anaLen],    7         \n\t"
419    "3:                                                     \n\t"
420     "blez              %[after],   4f                      \n\t"
421     " nop                                                  \n\t"
422     "lh                %[r0],      0(%[window])            \n\t"
423     "lh                %[r1],      0(%[anaBuf])            \n\t"
424     "mul               %[r0],      %[r0],        %[r1]     \n\t"
425     "addiu             %[window],  %[window],    2         \n\t"
426     "addiu             %[anaBuf],  %[anaBuf],    2         \n\t"
427     "addiu             %[outBuf],  %[outBuf],    2         \n\t"
428     "shra_r.w          %[r0],      %[r0],        14        \n\t"
429     "sh                %[r0],      -2(%[outBuf])           \n\t"
430     "b                 3b                                  \n\t"
431     " addiu            %[after],   %[after],     -1        \n\t"
432    "4:                                                     \n\t"
433     ".set              pop                                 \n\t"
434     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
435       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
436       [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
437       [iters] "=&r" (iters), [after] "=&r" (after),
438       [window] "+r" (window),[anaBuf] "+r" (anaBuf),
439       [outBuf] "+r" (outBuf)
440     : [anaLen] "r" (anaLen)
441     : "memory", "hi", "lo"
442   );
443 #else
444   __asm  __volatile(
445     ".set           push                                    \n\t"
446     ".set           noreorder                               \n\t"
447     "sra            %[iters],   %[anaLen],      2           \n\t"
448    "1:                                                      \n\t"
449     "blez           %[iters],   2f                          \n\t"
450     " nop                                                   \n\t"
451     "lh             %[r0],      0(%[window])                \n\t"
452     "lh             %[r1],      0(%[anaBuf])                \n\t"
453     "lh             %[r2],      2(%[window])                \n\t"
454     "lh             %[r3],      2(%[anaBuf])                \n\t"
455     "lh             %[r4],      4(%[window])                \n\t"
456     "lh             %[r5],      4(%[anaBuf])                \n\t"
457     "lh             %[r6],      6(%[window])                \n\t"
458     "lh             %[r7],      6(%[anaBuf])                \n\t"
459     "mul            %[r0],      %[r0],          %[r1]       \n\t"
460     "mul            %[r2],      %[r2],          %[r3]       \n\t"
461     "mul            %[r4],      %[r4],          %[r5]       \n\t"
462     "mul            %[r6],      %[r6],          %[r7]       \n\t"
463     "addiu          %[window],  %[window],      8           \n\t"
464     "addiu          %[anaBuf],  %[anaBuf],      8           \n\t"
465     "addiu          %[r0],      %[r0],          0x2000      \n\t"
466     "addiu          %[r2],      %[r2],          0x2000      \n\t"
467     "addiu          %[r4],      %[r4],          0x2000      \n\t"
468     "addiu          %[r6],      %[r6],          0x2000      \n\t"
469     "sra            %[r0],      %[r0],          14          \n\t"
470     "sra            %[r2],      %[r2],          14          \n\t"
471     "sra            %[r4],      %[r4],          14          \n\t"
472     "sra            %[r6],      %[r6],          14          \n\t"
473     "sh             %[r0],      0(%[outBuf])                \n\t"
474     "sh             %[r2],      2(%[outBuf])                \n\t"
475     "sh             %[r4],      4(%[outBuf])                \n\t"
476     "sh             %[r6],      6(%[outBuf])                \n\t"
477     "addiu          %[outBuf],  %[outBuf],      8           \n\t"
478     "b              1b                                      \n\t"
479     " addiu         %[iters],   %[iters],       -1          \n\t"
480    "2:                                                      \n\t"
481     "andi           %[after],   %[anaLen],      3           \n\t"
482    "3:                                                      \n\t"
483     "blez           %[after],   4f                          \n\t"
484     " nop                                                   \n\t"
485     "lh             %[r0],      0(%[window])                \n\t"
486     "lh             %[r1],      0(%[anaBuf])                \n\t"
487     "mul            %[r0],      %[r0],          %[r1]       \n\t"
488     "addiu          %[window],  %[window],      2           \n\t"
489     "addiu          %[anaBuf],  %[anaBuf],      2           \n\t"
490     "addiu          %[outBuf],  %[outBuf],      2           \n\t"
491     "addiu          %[r0],      %[r0],          0x2000      \n\t"
492     "sra            %[r0],      %[r0],          14          \n\t"
493     "sh             %[r0],      -2(%[outBuf])               \n\t"
494     "b              3b                                      \n\t"
495     " addiu         %[after],   %[after],       -1          \n\t"
496    "4:                                                      \n\t"
497     ".set           pop                                     \n\t"
498     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
499       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
500       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
501       [after] "=&r" (after), [window] "+r" (window),
502       [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
503     : [anaLen] "r" (anaLen)
504     : "memory", "hi", "lo"
505   );
506 #endif
507 }
508
509 // For the noise supression process, synthesis, read out fully processed
510 // segment, and update synthesis buffer.
511 void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
512                                     int16_t* out_frame,
513                                     int16_t gain_factor) {
514
515   int iters = inst->blockLen10ms >> 2;
516   int after = inst->blockLen10ms & 3;
517   int r0, r1, r2, r3, r4, r5, r6, r7;
518   int16_t *window = (int16_t*)inst->window;
519   int16_t *real = inst->real;
520   int16_t *synthBuf = inst->synthesisBuffer;
521   int16_t *out = out_frame;
522   int sat_pos = 0x7fff;
523   int sat_neg = 0xffff8000;
524   int block10 = (int)inst->blockLen10ms;
525   int anaLen = (int)inst->anaLen;
526
527   __asm __volatile(
528     ".set       push                                        \n\t"
529     ".set       noreorder                                   \n\t"
530    "1:                                                      \n\t"
531     "blez       %[iters],   2f                              \n\t"
532     " nop                                                   \n\t"
533     "lh         %[r0],      0(%[window])                    \n\t"
534     "lh         %[r1],      0(%[real])                      \n\t"
535     "lh         %[r2],      2(%[window])                    \n\t"
536     "lh         %[r3],      2(%[real])                      \n\t"
537     "lh         %[r4],      4(%[window])                    \n\t"
538     "lh         %[r5],      4(%[real])                      \n\t"
539     "lh         %[r6],      6(%[window])                    \n\t"
540     "lh         %[r7],      6(%[real])                      \n\t"
541     "mul        %[r0],      %[r0],          %[r1]           \n\t"
542     "mul        %[r2],      %[r2],          %[r3]           \n\t"
543     "mul        %[r4],      %[r4],          %[r5]           \n\t"
544     "mul        %[r6],      %[r6],          %[r7]           \n\t"
545     "addiu      %[r0],      %[r0],          0x2000          \n\t"
546     "addiu      %[r2],      %[r2],          0x2000          \n\t"
547     "addiu      %[r4],      %[r4],          0x2000          \n\t"
548     "addiu      %[r6],      %[r6],          0x2000          \n\t"
549     "sra        %[r0],      %[r0],          14              \n\t"
550     "sra        %[r2],      %[r2],          14              \n\t"
551     "sra        %[r4],      %[r4],          14              \n\t"
552     "sra        %[r6],      %[r6],          14              \n\t"
553     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
554     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
555     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
556     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
557     "addiu      %[r0],      %[r0],          0x1000          \n\t"
558     "addiu      %[r2],      %[r2],          0x1000          \n\t"
559     "addiu      %[r4],      %[r4],          0x1000          \n\t"
560     "addiu      %[r6],      %[r6],          0x1000          \n\t"
561     "sra        %[r0],      %[r0],          13              \n\t"
562     "sra        %[r2],      %[r2],          13              \n\t"
563     "sra        %[r4],      %[r4],          13              \n\t"
564     "sra        %[r6],      %[r6],          13              \n\t"
565     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
566     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
567     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
568     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
569     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
570     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
571     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
572     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
573     "lh         %[r1],      0(%[synthBuf])                  \n\t"
574     "lh         %[r3],      2(%[synthBuf])                  \n\t"
575     "lh         %[r5],      4(%[synthBuf])                  \n\t"
576     "lh         %[r7],      6(%[synthBuf])                  \n\t"
577     "addu       %[r0],      %[r0],          %[r1]           \n\t"
578     "addu       %[r2],      %[r2],          %[r3]           \n\t"
579     "addu       %[r4],      %[r4],          %[r5]           \n\t"
580     "addu       %[r6],      %[r6],          %[r7]           \n\t"
581     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
582     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
583     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
584     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
585     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
586     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
587     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
588     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
589     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
590     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
591     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
592     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
593     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
594     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
595     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
596     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
597     "sh         %[r0],      0(%[synthBuf])                  \n\t"
598     "sh         %[r2],      2(%[synthBuf])                  \n\t"
599     "sh         %[r4],      4(%[synthBuf])                  \n\t"
600     "sh         %[r6],      6(%[synthBuf])                  \n\t"
601     "sh         %[r0],      0(%[out])                       \n\t"
602     "sh         %[r2],      2(%[out])                       \n\t"
603     "sh         %[r4],      4(%[out])                       \n\t"
604     "sh         %[r6],      6(%[out])                       \n\t"
605     "addiu      %[window],  %[window],      8               \n\t"
606     "addiu      %[real],    %[real],        8               \n\t"
607     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
608     "addiu      %[out],     %[out],         8               \n\t"
609     "b          1b                                          \n\t"
610     " addiu     %[iters],   %[iters],       -1              \n\t"
611    "2:                                                      \n\t"
612     "blez       %[after],   3f                              \n\t"
613     " subu      %[block10], %[anaLen],      %[block10]      \n\t"
614     "lh         %[r0],      0(%[window])                    \n\t"
615     "lh         %[r1],      0(%[real])                      \n\t"
616     "mul        %[r0],      %[r0],          %[r1]           \n\t"
617     "addiu      %[window],  %[window],      2               \n\t"
618     "addiu      %[real],    %[real],        2               \n\t"
619     "addiu      %[r0],      %[r0],          0x2000          \n\t"
620     "sra        %[r0],      %[r0],          14              \n\t"
621     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
622     "addiu      %[r0],      %[r0],          0x1000          \n\t"
623     "sra        %[r0],      %[r0],          13              \n\t"
624     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
625     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
626     "lh         %[r1],      0(%[synthBuf])                  \n\t"
627     "addu       %[r0],      %[r0],          %[r1]           \n\t"
628     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
629     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
630     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
631     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
632     "sh         %[r0],      0(%[synthBuf])                  \n\t"
633     "sh         %[r0],      0(%[out])                       \n\t"
634     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
635     "addiu      %[out],     %[out],         2               \n\t"
636     "b          2b                                          \n\t"
637     " addiu     %[after],   %[after],       -1              \n\t"
638    "3:                                                      \n\t"
639     "sra        %[iters],   %[block10],     2               \n\t"
640    "4:                                                      \n\t"
641     "blez       %[iters],   5f                              \n\t"
642     " andi      %[after],   %[block10],     3               \n\t"
643     "lh         %[r0],      0(%[window])                    \n\t"
644     "lh         %[r1],      0(%[real])                      \n\t"
645     "lh         %[r2],      2(%[window])                    \n\t"
646     "lh         %[r3],      2(%[real])                      \n\t"
647     "lh         %[r4],      4(%[window])                    \n\t"
648     "lh         %[r5],      4(%[real])                      \n\t"
649     "lh         %[r6],      6(%[window])                    \n\t"
650     "lh         %[r7],      6(%[real])                      \n\t"
651     "mul        %[r0],      %[r0],          %[r1]           \n\t"
652     "mul        %[r2],      %[r2],          %[r3]           \n\t"
653     "mul        %[r4],      %[r4],          %[r5]           \n\t"
654     "mul        %[r6],      %[r6],          %[r7]           \n\t"
655     "addiu      %[r0],      %[r0],          0x2000          \n\t"
656     "addiu      %[r2],      %[r2],          0x2000          \n\t"
657     "addiu      %[r4],      %[r4],          0x2000          \n\t"
658     "addiu      %[r6],      %[r6],          0x2000          \n\t"
659     "sra        %[r0],      %[r0],          14              \n\t"
660     "sra        %[r2],      %[r2],          14              \n\t"
661     "sra        %[r4],      %[r4],          14              \n\t"
662     "sra        %[r6],      %[r6],          14              \n\t"
663     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
664     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
665     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
666     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
667     "addiu      %[r0],      %[r0],          0x1000          \n\t"
668     "addiu      %[r2],      %[r2],          0x1000          \n\t"
669     "addiu      %[r4],      %[r4],          0x1000          \n\t"
670     "addiu      %[r6],      %[r6],          0x1000          \n\t"
671     "sra        %[r0],      %[r0],          13              \n\t"
672     "sra        %[r2],      %[r2],          13              \n\t"
673     "sra        %[r4],      %[r4],          13              \n\t"
674     "sra        %[r6],      %[r6],          13              \n\t"
675     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
676     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
677     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
678     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
679     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
680     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
681     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
682     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
683     "lh         %[r1],      0(%[synthBuf])                  \n\t"
684     "lh         %[r3],      2(%[synthBuf])                  \n\t"
685     "lh         %[r5],      4(%[synthBuf])                  \n\t"
686     "lh         %[r7],      6(%[synthBuf])                  \n\t"
687     "addu       %[r0],      %[r0],          %[r1]           \n\t"
688     "addu       %[r2],      %[r2],          %[r3]           \n\t"
689     "addu       %[r4],      %[r4],          %[r5]           \n\t"
690     "addu       %[r6],      %[r6],          %[r7]           \n\t"
691     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
692     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
693     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
694     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
695     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
696     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
697     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
698     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
699     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
700     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
701     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
702     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
703     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
704     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
705     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
706     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
707     "sh         %[r0],      0(%[synthBuf])                  \n\t"
708     "sh         %[r2],      2(%[synthBuf])                  \n\t"
709     "sh         %[r4],      4(%[synthBuf])                  \n\t"
710     "sh         %[r6],      6(%[synthBuf])                  \n\t"
711     "addiu      %[window],  %[window],      8               \n\t"
712     "addiu      %[real],    %[real],        8               \n\t"
713     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
714     "b          4b                                          \n\t"
715     " addiu     %[iters],   %[iters],       -1              \n\t"
716    "5:                                                      \n\t"
717     "blez       %[after],   6f                              \n\t"
718     " nop                                                   \n\t"
719     "lh         %[r0],      0(%[window])                    \n\t"
720     "lh         %[r1],      0(%[real])                      \n\t"
721     "mul        %[r0],      %[r0],          %[r1]           \n\t"
722     "addiu      %[window],  %[window],      2               \n\t"
723     "addiu      %[real],    %[real],        2               \n\t"
724     "addiu      %[r0],      %[r0],          0x2000          \n\t"
725     "sra        %[r0],      %[r0],          14              \n\t"
726     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
727     "addiu      %[r0],      %[r0],          0x1000          \n\t"
728     "sra        %[r0],      %[r0],          13              \n\t"
729     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
730     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
731     "lh         %[r1],      0(%[synthBuf])                  \n\t"
732     "addu       %[r0],      %[r0],          %[r1]           \n\t"
733     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
734     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
735     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
736     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
737     "sh         %[r0],      0(%[synthBuf])                  \n\t"
738     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
739     "b          2b                                          \n\t"
740     " addiu     %[after],   %[after],       -1              \n\t"
741    "6:                                                      \n\t"
742     ".set       pop                                         \n\t"
743     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
744       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
745       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
746       [after] "+r" (after), [block10] "+r" (block10),
747       [window] "+r" (window), [real] "+r" (real),
748       [synthBuf] "+r" (synthBuf), [out] "+r" (out)
749     : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
750       [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
751     : "memory", "hi", "lo"
752   );
753
754   // update synthesis buffer
755   WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
756                         inst->synthesisBuffer + inst->blockLen10ms,
757                         inst->anaLen - inst->blockLen10ms);
758   WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
759       + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
760 }
761
762 // Filter the data in the frequency domain, and create spectrum.
763 void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buf) {
764
765   uint16_t *noiseSupFilter = inst->noiseSupFilter;
766   int16_t *real = inst->real;
767   int16_t *imag = inst->imag;
768   int32_t loop_count = 2;
769   int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
770   int16_t tmp16 = (inst->anaLen << 1) - 4;
771   int16_t* freq_buf_f = freq_buf;
772   int16_t* freq_buf_s = &freq_buf[tmp16];
773
774   __asm __volatile (
775     ".set       push                                                 \n\t"
776     ".set       noreorder                                            \n\t"
777     //first sample
778     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
779     "lh         %[tmp_2],           0(%[real])                       \n\t"
780     "lh         %[tmp_3],           0(%[imag])                       \n\t"
781     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
782     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
783     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
784     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
785     "sh         %[tmp_2],           0(%[real])                       \n\t"
786     "sh         %[tmp_3],           0(%[imag])                       \n\t"
787     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
788     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
789     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
790     "addiu      %[real],            %[real],              2          \n\t"
791     "addiu      %[imag],            %[imag],              2          \n\t"
792     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    2          \n\t"
793     "addiu      %[freq_buf_f],      %[freq_buf_f],        4          \n\t"
794    "1:                                                               \n\t"
795     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
796     "lh         %[tmp_2],           0(%[real])                       \n\t"
797     "lh         %[tmp_3],           0(%[imag])                       \n\t"
798     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
799     "lh         %[tmp_5],           2(%[real])                       \n\t"
800     "lh         %[tmp_6],           2(%[imag])                       \n\t"
801     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
802     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
803     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
804     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
805     "addiu      %[loop_count],      %[loop_count],        2          \n\t"
806     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
807     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
808     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
809     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
810     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    4          \n\t"
811     "sh         %[tmp_2],           0(%[real])                       \n\t"
812     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
813     "sh         %[tmp_3],           0(%[imag])                       \n\t"
814     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
815     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
816     "sh         %[tmp_5],           2(%[real])                       \n\t"
817     "sh         %[tmp_5],           0(%[freq_buf_s])                 \n\t"
818     "sh         %[tmp_6],           2(%[imag])                       \n\t"
819     "sh         %[tmp_6],           2(%[freq_buf_s])                 \n\t"
820     "negu       %[tmp_6],           %[tmp_6]                         \n\t"
821     "addiu      %[freq_buf_s],      %[freq_buf_s],        -8         \n\t"
822     "addiu      %[real],            %[real],              4          \n\t"
823     "addiu      %[imag],            %[imag],              4          \n\t"
824     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
825     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
826     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
827     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
828     "blt        %[loop_count],      %[loop_size],         1b         \n\t"
829     " addiu     %[freq_buf_f],      %[freq_buf_f],        8          \n\t"
830     //last two samples:
831     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
832     "lh         %[tmp_2],           0(%[real])                       \n\t"
833     "lh         %[tmp_3],           0(%[imag])                       \n\t"
834     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
835     "lh         %[tmp_5],           2(%[real])                       \n\t"
836     "lh         %[tmp_6],           2(%[imag])                       \n\t"
837     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
838     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
839     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
840     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
841     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
842     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
843     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
844     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
845     "sh         %[tmp_2],           0(%[real])                       \n\t"
846     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
847     "sh         %[tmp_3],           0(%[imag])                       \n\t"
848     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
849     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
850     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
851     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
852     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
853     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
854     "sh         %[tmp_5],           2(%[real])                       \n\t"
855     "sh         %[tmp_6],           2(%[imag])                       \n\t"
856     ".set       pop                                                  \n\t"
857     : [real] "+r" (real), [imag] "+r" (imag),
858       [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
859       [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
860       [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
861       [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
862     : [loop_size] "r" (inst->anaLen2)
863     : "memory", "hi", "lo"
864   );
865 }
866
867 #if defined(MIPS_DSP_R1_LE)
868 // Denormalize the real-valued signal |in|, the output from inverse FFT.
869 void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor) {
870   int32_t r0, r1, r2, r3, t0;
871   int len = inst->anaLen;
872   int16_t *out = &inst->real[0];
873   int shift = factor - inst->normData;
874
875   __asm __volatile (
876     ".set          push                                \n\t"
877     ".set          noreorder                           \n\t"
878     "beqz          %[len],     8f                      \n\t"
879     " nop                                              \n\t"
880     "bltz          %[shift],   4f                      \n\t"
881     " sra          %[t0],      %[len],      2          \n\t"
882     "beqz          %[t0],      2f                      \n\t"
883     " andi         %[len],     %[len],      3          \n\t"
884    "1:                                                 \n\t"
885     "lh            %[r0],      0(%[in])                \n\t"
886     "lh            %[r1],      2(%[in])                \n\t"
887     "lh            %[r2],      4(%[in])                \n\t"
888     "lh            %[r3],      6(%[in])                \n\t"
889     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
890     "shllv_s.ph    %[r1],      %[r1],       %[shift]   \n\t"
891     "shllv_s.ph    %[r2],      %[r2],       %[shift]   \n\t"
892     "shllv_s.ph    %[r3],      %[r3],       %[shift]   \n\t"
893     "addiu         %[in],      %[in],       8          \n\t"
894     "addiu         %[t0],      %[t0],       -1         \n\t"
895     "sh            %[r0],      0(%[out])               \n\t"
896     "sh            %[r1],      2(%[out])               \n\t"
897     "sh            %[r2],      4(%[out])               \n\t"
898     "sh            %[r3],      6(%[out])               \n\t"
899     "bgtz          %[t0],      1b                      \n\t"
900     " addiu        %[out],     %[out],      8          \n\t"
901    "2:                                                 \n\t"
902     "beqz          %[len],     8f                      \n\t"
903     " nop                                              \n\t"
904    "3:                                                 \n\t"
905     "lh            %[r0],      0(%[in])                \n\t"
906     "addiu         %[in],      %[in],       2          \n\t"
907     "addiu         %[len],     %[len],      -1         \n\t"
908     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
909     "addiu         %[out],     %[out],      2          \n\t"
910     "bgtz          %[len],     3b                      \n\t"
911     " sh           %[r0],      -2(%[out])              \n\t"
912     "b             8f                                  \n\t"
913    "4:                                                 \n\t"
914     "negu          %[shift],   %[shift]                \n\t"
915     "beqz          %[t0],      6f                      \n\t"
916     " andi         %[len],     %[len],      3          \n\t"
917    "5:                                                 \n\t"
918     "lh            %[r0],      0(%[in])                \n\t"
919     "lh            %[r1],      2(%[in])                \n\t"
920     "lh            %[r2],      4(%[in])                \n\t"
921     "lh            %[r3],      6(%[in])                \n\t"
922     "srav          %[r0],      %[r0],       %[shift]   \n\t"
923     "srav          %[r1],      %[r1],       %[shift]   \n\t"
924     "srav          %[r2],      %[r2],       %[shift]   \n\t"
925     "srav          %[r3],      %[r3],       %[shift]   \n\t"
926     "addiu         %[in],      %[in],       8          \n\t"
927     "addiu         %[t0],      %[t0],       -1         \n\t"
928     "sh            %[r0],      0(%[out])               \n\t"
929     "sh            %[r1],      2(%[out])               \n\t"
930     "sh            %[r2],      4(%[out])               \n\t"
931     "sh            %[r3],      6(%[out])               \n\t"
932     "bgtz          %[t0],      5b                      \n\t"
933     " addiu        %[out],     %[out],      8          \n\t"
934    "6:                                                 \n\t"
935     "beqz          %[len],     8f                      \n\t"
936     " nop                                              \n\t"
937    "7:                                                 \n\t"
938     "lh            %[r0],      0(%[in])                \n\t"
939     "addiu         %[in],      %[in],       2          \n\t"
940     "addiu         %[len],     %[len],      -1         \n\t"
941     "srav          %[r0],      %[r0],       %[shift]   \n\t"
942     "addiu         %[out],     %[out],      2          \n\t"
943     "bgtz          %[len],     7b                      \n\t"
944     " sh           %[r0],      -2(%[out])              \n\t"
945    "8:                                                 \n\t"
946     ".set          pop                                 \n\t"
947     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
948       [r2] "=&r" (r2), [r3] "=&r" (r3)
949     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
950       [out] "r" (out)
951     : "memory"
952   );
953 }
954 #endif
955
956 // Normalize the real-valued signal |in|, the input to forward FFT.
957 void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
958                                         const int16_t* in,
959                                         int16_t* out) {
960   int32_t r0, r1, r2, r3, t0;
961   int len = inst->anaLen;
962   int shift = inst->normData;
963
964   __asm __volatile (
965     ".set          push                                \n\t"
966     ".set          noreorder                           \n\t"
967     "beqz          %[len],     4f                      \n\t"
968     " sra          %[t0],      %[len],      2          \n\t"
969     "beqz          %[t0],      2f                      \n\t"
970     " andi         %[len],     %[len],      3          \n\t"
971    "1:                                                 \n\t"
972     "lh            %[r0],      0(%[in])                \n\t"
973     "lh            %[r1],      2(%[in])                \n\t"
974     "lh            %[r2],      4(%[in])                \n\t"
975     "lh            %[r3],      6(%[in])                \n\t"
976     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
977     "sllv          %[r1],      %[r1],       %[shift]   \n\t"
978     "sllv          %[r2],      %[r2],       %[shift]   \n\t"
979     "sllv          %[r3],      %[r3],       %[shift]   \n\t"
980     "addiu         %[in],      %[in],       8          \n\t"
981     "addiu         %[t0],      %[t0],       -1         \n\t"
982     "sh            %[r0],      0(%[out])               \n\t"
983     "sh            %[r1],      2(%[out])               \n\t"
984     "sh            %[r2],      4(%[out])               \n\t"
985     "sh            %[r3],      6(%[out])               \n\t"
986     "bgtz          %[t0],      1b                      \n\t"
987     " addiu        %[out],     %[out],      8          \n\t"
988    "2:                                                 \n\t"
989     "beqz          %[len],     4f                      \n\t"
990     " nop                                              \n\t"
991    "3:                                                 \n\t"
992     "lh            %[r0],      0(%[in])                \n\t"
993     "addiu         %[in],      %[in],       2          \n\t"
994     "addiu         %[len],     %[len],      -1         \n\t"
995     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
996     "addiu         %[out],     %[out],      2          \n\t"
997     "bgtz          %[len],     3b                      \n\t"
998     " sh           %[r0],      -2(%[out])              \n\t"
999    "4:                                                 \n\t"
1000     ".set          pop                                 \n\t"
1001     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
1002       [r2] "=&r" (r2), [r3] "=&r" (r3)
1003     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
1004       [out] "r" (out)
1005     : "memory"
1006   );
1007 }
1008