src/third_party/webrtc/modules/audio_processing/ns/nsx_core_mips.c

   1 /*
   2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
  12 #include "webrtc/modules/audio_processing/ns/nsx_core.h"
  13
  14 static const int16_t kIndicatorTable[17] = {
  15   0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
  16   7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
  17 };
  18
  19 // Compute speech/noise probability
  20 // speech/noise probability is returned in: probSpeechFinal
  21 //snrLocPrior is the prior SNR for each frequency (in Q11)
  22 //snrLocPost is the post SNR for each frequency (in Q11)
  23 void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
  24                                uint16_t* nonSpeechProbFinal,
  25                                uint32_t* priorLocSnr,
  26                                uint32_t* postLocSnr) {
  27
  28   uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
  29   int32_t indPriorFX, tmp32no1;
  30   int32_t logLrtTimeAvgKsumFX;
  31   int16_t indPriorFX16;
  32   int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
  33   int i, normTmp, nShifts;
  34
  35   int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
  36   int32_t const_max = 0x7fffffff;
  37   int32_t const_neg43 = -43;
  38   int32_t const_5412 = 5412;
  39   int32_t const_11rsh12 = (11 << 12);
  40   int32_t const_178 = 178;
  41
  42
  43   // compute feature based on average LR factor
  44   // this is the average over all frequencies of the smooth log LRT
  45   logLrtTimeAvgKsumFX = 0;
  46   for (i = 0; i < inst->magnLen; i++) {
  47     r0 = postLocSnr[i]; // Q11
  48     r1 = priorLocSnr[i];
  49     r2 = inst->logLrtTimeAvgW32[i];
  50
  51     __asm __volatile(
  52       ".set       push                                    \n\t"
  53       ".set       noreorder                               \n\t"
  54       "clz        %[r3],    %[r0]                         \n\t"
  55       "clz        %[r5],    %[r1]                         \n\t"
  56       "slti       %[r4],    %[r3],    32                  \n\t"
  57       "slti       %[r6],    %[r5],    32                  \n\t"
  58       "movz       %[r3],    $0,       %[r4]               \n\t"
  59       "movz       %[r5],    $0,       %[r6]               \n\t"
  60       "slti       %[r4],    %[r3],    11                  \n\t"
  61       "addiu      %[r6],    %[r3],    -11                 \n\t"
  62       "neg        %[r7],    %[r6]                         \n\t"
  63       "sllv       %[r6],    %[r1],    %[r6]               \n\t"
  64       "srav       %[r7],    %[r1],    %[r7]               \n\t"
  65       "movn       %[r6],    %[r7],    %[r4]               \n\t"
  66       "sllv       %[r1],    %[r1],    %[r5]               \n\t"
  67       "and        %[r1],    %[r1],    %[const_max]        \n\t"
  68       "sra        %[r1],    %[r1],    19                  \n\t"
  69       "mul        %[r7],    %[r1],    %[r1]               \n\t"
  70       "sllv       %[r3],    %[r0],    %[r3]               \n\t"
  71       "divu       %[r8],    %[r3],    %[r6]               \n\t"
  72       "slti       %[r6],    %[r6],    1                   \n\t"
  73       "mul        %[r7],    %[r7],    %[const_neg43]      \n\t"
  74       "sra        %[r7],    %[r7],    19                  \n\t"
  75       "movz       %[r3],    %[r8],    %[r6]               \n\t"
  76       "subu       %[r0],    %[r0],    %[r3]               \n\t"
  77       "mul        %[r1],    %[r1],    %[const_5412]       \n\t"
  78       "sra        %[r1],    %[r1],    12                  \n\t"
  79       "addu       %[r7],    %[r7],    %[r1]               \n\t"
  80       "addiu      %[r1],    %[r7],    37                  \n\t"
  81       "addiu      %[r5],    %[r5],    -31                 \n\t"
  82       "neg        %[r5],    %[r5]                         \n\t"
  83       "sll        %[r5],    %[r5],    12                  \n\t"
  84       "addu       %[r5],    %[r5],    %[r1]               \n\t"
  85       "subu       %[r7],    %[r5],    %[const_11rsh12]    \n\t"
  86       "mul        %[r7],    %[r7],    %[const_178]        \n\t"
  87       "sra        %[r7],    %[r7],    8                   \n\t"
  88       "addu       %[r7],    %[r7],    %[r2]               \n\t"
  89       "sra        %[r7],    %[r7],    1                   \n\t"
  90       "subu       %[r2],    %[r2],    %[r7]               \n\t"
  91       "addu       %[r2],    %[r2],    %[r0]               \n\t"
  92       ".set       pop                                     \n\t"
  93       : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
  94         [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
  95         [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
  96       : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
  97         [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
  98         [const_178] "r" (const_178)
  99       : "hi", "lo"
 100     );
 101     inst->logLrtTimeAvgW32[i] = r2;
 102     logLrtTimeAvgKsumFX += r2;
 103   }
 104
 105   inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
 106                                               inst->stages + 10);
 107                                                   // 5 = BIN_SIZE_LRT / 2
 108   // done with computation of LR factor
 109
 110   //
 111   // compute the indicator functions
 112   //
 113
 114   // average LRT feature
 115   // FLOAT code
 116   // indicator0 = 0.5 * (tanh(widthPrior *
 117   //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
 118   tmpIndFX = 16384; // Q14(1.0)
 119   tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
 120   nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
 121   //use larger width in tanh map for pause regions
 122   if (tmp32no1 < 0) {
 123     tmpIndFX = 0;
 124     tmp32no1 = -tmp32no1;
 125     //widthPrior = widthPrior * 2.0;
 126     nShifts++;
 127   }
 128   tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
 129   // compute indicator function: sigmoid map
 130   tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
 131   if ((tableIndex < 16) && (tableIndex >= 0)) {
 132     tmp16no2 = kIndicatorTable[tableIndex];
 133     tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
 134     frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
 135     tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
 136     if (tmpIndFX == 0) {
 137       tmpIndFX = 8192 - tmp16no2; // Q14
 138     } else {
 139       tmpIndFX = 8192 + tmp16no2; // Q14
 140     }
 141   }
 142   indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
 143
 144   //spectral flatness feature
 145   if (inst->weightSpecFlat) {
 146     tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
 147     tmpIndFX = 16384; // Q14(1.0)
 148     //use larger width in tanh map for pause regions
 149     tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
 150     nShifts = 4;
 151     if (inst->thresholdSpecFlat < tmpU32no1) {
 152       tmpIndFX = 0;
 153       tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
 154       //widthPrior = widthPrior * 2.0;
 155       nShifts++;
 156     }
 157     tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
 158                                                                   nShifts), 25);
 159                                                      //Q14
 160     tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
 161                                     25); //Q14
 162     // compute indicator function: sigmoid map
 163     // FLOAT code
 164     // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
 165     //                          (threshPrior1 - tmpFloat1)) + 1.0);
 166     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
 167     if (tableIndex < 16) {
 168       tmp16no2 = kIndicatorTable[tableIndex];
 169       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
 170       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
 171       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
 172       if (tmpIndFX) {
 173         tmpIndFX = 8192 + tmp16no2; // Q14
 174       } else {
 175         tmpIndFX = 8192 - tmp16no2; // Q14
 176       }
 177     }
 178     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
 179   }
 180
 181   //for template spectral-difference
 182   if (inst->weightSpecDiff) {
 183     tmpU32no1 = 0;
 184     if (inst->featureSpecDiff) {
 185       normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
 186                                WebRtcSpl_NormU32(inst->featureSpecDiff));
 187       tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
 188                                                          // Q(normTmp-2*stages)
 189       tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
 190                                         20 - inst->stages - normTmp);
 191       if (tmpU32no2 > 0) {
 192         // Q(20 - inst->stages)
 193         tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
 194       } else {
 195         tmpU32no1 = (uint32_t)(0x7fffffff);
 196       }
 197     }
 198     tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
 199                                                       17),
 200                                 25);
 201     tmpU32no2 = tmpU32no1 - tmpU32no3;
 202     nShifts = 1;
 203     tmpIndFX = 16384; // Q14(1.0)
 204     //use larger width in tanh map for pause regions
 205     if (tmpU32no2 & 0x80000000) {
 206       tmpIndFX = 0;
 207       tmpU32no2 = tmpU32no3 - tmpU32no1;
 208       //widthPrior = widthPrior * 2.0;
 209       nShifts--;
 210     }
 211     tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
 212     // compute indicator function: sigmoid map
 213     /* FLOAT code
 214      indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
 215      */
 216     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
 217     if (tableIndex < 16) {
 218       tmp16no2 = kIndicatorTable[tableIndex];
 219       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
 220       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
 221       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
 222                     tmp16no1, frac, 14);
 223       if (tmpIndFX) {
 224         tmpIndFX = 8192 + tmp16no2;
 225       } else {
 226         tmpIndFX = 8192 - tmp16no2;
 227       }
 228     }
 229     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
 230   }
 231
 232   //combine the indicator function with the feature weights
 233   // FLOAT code
 234   // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
 235   //                 indicator1 + weightIndPrior2 * indicator2);
 236   indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
 237   // done with computing indicator function
 238
 239   //compute the prior probability
 240   // FLOAT code
 241   // inst->priorNonSpeechProb += PRIOR_UPDATE *
 242   //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
 243   tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
 244   inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
 245                                 PRIOR_UPDATE_Q14, tmp16, 14); // Q14
 246
 247   //final speech probability: combine prior model with LR factor:
 248
 249   memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
 250
 251   if (inst->priorNonSpeechProb > 0) {
 252     r0 = inst->priorNonSpeechProb;
 253     r1 = 16384 - r0;
 254     int32_t const_23637 = 23637;
 255     int32_t const_44 = 44;
 256     int32_t const_84 = 84;
 257     int32_t const_1 = 1;
 258     int32_t const_neg8 = -8;
 259     for (i = 0; i < inst->magnLen; i++) {
 260       r2 = inst->logLrtTimeAvgW32[i];
 261       if (r2 < 65300) {
 262         __asm __volatile(
 263           ".set         push                                      \n\t"
 264           ".set         noreorder                                 \n\t"
 265           "mul          %[r2],    %[r2],          %[const_23637]  \n\t"
 266           "sll          %[r6],    %[r1],          16              \n\t"
 267           "clz          %[r7],    %[r6]                           \n\t"
 268           "clo          %[r8],    %[r6]                           \n\t"
 269           "slt          %[r9],    %[r6],          $0              \n\t"
 270           "movn         %[r7],    %[r8],          %[r9]           \n\t"
 271           "sra          %[r2],    %[r2],          14              \n\t"
 272           "andi         %[r3],    %[r2],          0xfff           \n\t"
 273           "mul          %[r4],    %[r3],          %[r3]           \n\t"
 274           "mul          %[r3],    %[r3],          %[const_84]     \n\t"
 275           "sra          %[r2],    %[r2],          12              \n\t"
 276           "slt          %[r5],    %[r2],          %[const_neg8]   \n\t"
 277           "movn         %[r2],    %[const_neg8],  %[r5]           \n\t"
 278           "mul          %[r4],    %[r4],          %[const_44]     \n\t"
 279           "sra          %[r3],    %[r3],          7               \n\t"
 280           "addiu        %[r7],    %[r7],          -1              \n\t"
 281           "slti         %[r9],    %[r7],          31              \n\t"
 282           "movz         %[r7],    $0,             %[r9]           \n\t"
 283           "sra          %[r4],    %[r4],          19              \n\t"
 284           "addu         %[r4],    %[r4],          %[r3]           \n\t"
 285           "addiu        %[r3],    %[r2],          8               \n\t"
 286           "addiu        %[r2],    %[r2],          -4              \n\t"
 287           "neg          %[r5],    %[r2]                           \n\t"
 288           "sllv         %[r6],    %[r4],          %[r2]           \n\t"
 289           "srav         %[r5],    %[r4],          %[r5]           \n\t"
 290           "slt          %[r2],    %[r2],          $0              \n\t"
 291           "movn         %[r6],    %[r5],          %[r2]           \n\t"
 292           "sllv         %[r3],    %[const_1],     %[r3]           \n\t"
 293           "addu         %[r2],    %[r3],          %[r6]           \n\t"
 294           "clz          %[r4],    %[r2]                           \n\t"
 295           "clo          %[r5],    %[r2]                           \n\t"
 296           "slt          %[r8],    %[r2],          $0              \n\t"
 297           "movn         %[r4],    %[r5],          %[r8]           \n\t"
 298           "addiu        %[r4],    %[r4],          -1              \n\t"
 299           "slt          %[r5],    $0,             %[r2]           \n\t"
 300           "or           %[r5],    %[r5],          %[r7]           \n\t"
 301           "movz         %[r4],    $0,             %[r5]           \n\t"
 302           "addiu        %[r6],    %[r7],          -7              \n\t"
 303           "addu         %[r6],    %[r6],          %[r4]           \n\t"
 304           "bltz         %[r6],    1f                              \n\t"
 305           " nop                                                   \n\t"
 306           "addiu        %[r4],    %[r6],          -8              \n\t"
 307           "neg          %[r3],    %[r4]                           \n\t"
 308           "srav         %[r5],    %[r2],          %[r3]           \n\t"
 309           "mul          %[r5],    %[r5],          %[r1]           \n\t"
 310           "mul          %[r2],    %[r2],          %[r1]           \n\t"
 311           "slt          %[r4],    %[r4],          $0              \n\t"
 312           "srav         %[r5],    %[r5],          %[r6]           \n\t"
 313           "sra          %[r2],    %[r2],          8               \n\t"
 314           "movn         %[r2],    %[r5],          %[r4]           \n\t"
 315           "sll          %[r3],    %[r0],          8               \n\t"
 316           "addu         %[r2],    %[r0],          %[r2]           \n\t"
 317           "divu         %[r3],    %[r3],          %[r2]           \n\t"
 318          "1:                                                      \n\t"
 319           ".set         pop                                       \n\t"
 320           : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
 321             [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
 322             [r8] "=&r" (r8), [r9] "=&r" (r9)
 323           : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
 324             [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
 325             [const_1] "r" (const_1), [const_44] "r" (const_44)
 326           : "hi", "lo"
 327         );
 328         nonSpeechProbFinal[i] = r3;
 329       }
 330     }
 331   }
 332 }
 333
 334 // Update analysis buffer for lower band, and window data before FFT.
 335 void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
 336                                    int16_t* out,
 337                                    int16_t* new_speech) {
 338
 339   int iters, after;
 340   int anaLen = inst->anaLen;
 341   int *window = (int*)inst->window;
 342   int *anaBuf = (int*)inst->analysisBuffer;
 343   int *outBuf = (int*)out;
 344   int r0, r1, r2, r3, r4, r5, r6, r7;
 345 #if defined(MIPS_DSP_R1_LE)
 346   int r8;
 347 #endif
 348
 349   // For lower band update analysis buffer.
 350   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
 351                         inst->analysisBuffer + inst->blockLen10ms,
 352                         inst->anaLen - inst->blockLen10ms);
 353   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
 354       + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
 355
 356   // Window data before FFT.
 357 #if defined(MIPS_DSP_R1_LE)
 358   __asm __volatile(
 359     ".set              push                                \n\t"
 360     ".set              noreorder                           \n\t"
 361     "sra               %[iters],   %[anaLen],    3         \n\t"
 362    "1:                                                     \n\t"
 363     "blez              %[iters],   2f                      \n\t"
 364     " nop                                                  \n\t"
 365     "lw                %[r0],      0(%[window])            \n\t"
 366     "lw                %[r1],      0(%[anaBuf])            \n\t"
 367     "lw                %[r2],      4(%[window])            \n\t"
 368     "lw                %[r3],      4(%[anaBuf])            \n\t"
 369     "lw                %[r4],      8(%[window])            \n\t"
 370     "lw                %[r5],      8(%[anaBuf])            \n\t"
 371     "lw                %[r6],      12(%[window])           \n\t"
 372     "lw                %[r7],      12(%[anaBuf])           \n\t"
 373     "muleq_s.w.phl     %[r8],      %[r0],        %[r1]     \n\t"
 374     "muleq_s.w.phr     %[r0],      %[r0],        %[r1]     \n\t"
 375     "muleq_s.w.phl     %[r1],      %[r2],        %[r3]     \n\t"
 376     "muleq_s.w.phr     %[r2],      %[r2],        %[r3]     \n\t"
 377     "muleq_s.w.phl     %[r3],      %[r4],        %[r5]     \n\t"
 378     "muleq_s.w.phr     %[r4],      %[r4],        %[r5]     \n\t"
 379     "muleq_s.w.phl     %[r5],      %[r6],        %[r7]     \n\t"
 380     "muleq_s.w.phr     %[r6],      %[r6],        %[r7]     \n\t"
 381 #if defined(MIPS_DSP_R2_LE)
 382     "precr_sra_r.ph.w  %[r8],      %[r0],        15        \n\t"
 383     "precr_sra_r.ph.w  %[r1],      %[r2],        15        \n\t"
 384     "precr_sra_r.ph.w  %[r3],      %[r4],        15        \n\t"
 385     "precr_sra_r.ph.w  %[r5],      %[r6],        15        \n\t"
 386     "sw                %[r8],      0(%[outBuf])            \n\t"
 387     "sw                %[r1],      4(%[outBuf])            \n\t"
 388     "sw                %[r3],      8(%[outBuf])            \n\t"
 389     "sw                %[r5],      12(%[outBuf])           \n\t"
 390 #else
 391     "shra_r.w          %[r8],      %[r8],        15        \n\t"
 392     "shra_r.w          %[r0],      %[r0],        15        \n\t"
 393     "shra_r.w          %[r1],      %[r1],        15        \n\t"
 394     "shra_r.w          %[r2],      %[r2],        15        \n\t"
 395     "shra_r.w          %[r3],      %[r3],        15        \n\t"
 396     "shra_r.w          %[r4],      %[r4],        15        \n\t"
 397     "shra_r.w          %[r5],      %[r5],        15        \n\t"
 398     "shra_r.w          %[r6],      %[r6],        15        \n\t"
 399     "sll               %[r0],      %[r0],        16        \n\t"
 400     "sll               %[r2],      %[r2],        16        \n\t"
 401     "sll               %[r4],      %[r4],        16        \n\t"
 402     "sll               %[r6],      %[r6],        16        \n\t"
 403     "packrl.ph         %[r0],      %[r8],        %[r0]     \n\t"
 404     "packrl.ph         %[r2],      %[r1],        %[r2]     \n\t"
 405     "packrl.ph         %[r4],      %[r3],        %[r4]     \n\t"
 406     "packrl.ph         %[r6],      %[r5],        %[r6]     \n\t"
 407     "sw                %[r0],      0(%[outBuf])            \n\t"
 408     "sw                %[r2],      4(%[outBuf])            \n\t"
 409     "sw                %[r4],      8(%[outBuf])            \n\t"
 410     "sw                %[r6],      12(%[outBuf])           \n\t"
 411 #endif
 412     "addiu             %[window],  %[window],    16        \n\t"
 413     "addiu             %[anaBuf],  %[anaBuf],    16        \n\t"
 414     "addiu             %[outBuf],  %[outBuf],    16        \n\t"
 415     "b                 1b                                  \n\t"
 416     " addiu            %[iters],   %[iters],     -1        \n\t"
 417    "2:                                                     \n\t"
 418     "andi              %[after],   %[anaLen],    7         \n\t"
 419    "3:                                                     \n\t"
 420     "blez              %[after],   4f                      \n\t"
 421     " nop                                                  \n\t"
 422     "lh                %[r0],      0(%[window])            \n\t"
 423     "lh                %[r1],      0(%[anaBuf])            \n\t"
 424     "mul               %[r0],      %[r0],        %[r1]     \n\t"
 425     "addiu             %[window],  %[window],    2         \n\t"
 426     "addiu             %[anaBuf],  %[anaBuf],    2         \n\t"
 427     "addiu             %[outBuf],  %[outBuf],    2         \n\t"
 428     "shra_r.w          %[r0],      %[r0],        14        \n\t"
 429     "sh                %[r0],      -2(%[outBuf])           \n\t"
 430     "b                 3b                                  \n\t"
 431     " addiu            %[after],   %[after],     -1        \n\t"
 432    "4:                                                     \n\t"
 433     ".set              pop                                 \n\t"
 434     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
 435       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
 436       [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
 437       [iters] "=&r" (iters), [after] "=&r" (after),
 438       [window] "+r" (window),[anaBuf] "+r" (anaBuf),
 439       [outBuf] "+r" (outBuf)
 440     : [anaLen] "r" (anaLen)
 441     : "memory", "hi", "lo"
 442   );
 443 #else
 444   __asm  __volatile(
 445     ".set           push                                    \n\t"
 446     ".set           noreorder                               \n\t"
 447     "sra            %[iters],   %[anaLen],      2           \n\t"
 448    "1:                                                      \n\t"
 449     "blez           %[iters],   2f                          \n\t"
 450     " nop                                                   \n\t"
 451     "lh             %[r0],      0(%[window])                \n\t"
 452     "lh             %[r1],      0(%[anaBuf])                \n\t"
 453     "lh             %[r2],      2(%[window])                \n\t"
 454     "lh             %[r3],      2(%[anaBuf])                \n\t"
 455     "lh             %[r4],      4(%[window])                \n\t"
 456     "lh             %[r5],      4(%[anaBuf])                \n\t"
 457     "lh             %[r6],      6(%[window])                \n\t"
 458     "lh             %[r7],      6(%[anaBuf])                \n\t"
 459     "mul            %[r0],      %[r0],          %[r1]       \n\t"
 460     "mul            %[r2],      %[r2],          %[r3]       \n\t"
 461     "mul            %[r4],      %[r4],          %[r5]       \n\t"
 462     "mul            %[r6],      %[r6],          %[r7]       \n\t"
 463     "addiu          %[window],  %[window],      8           \n\t"
 464     "addiu          %[anaBuf],  %[anaBuf],      8           \n\t"
 465     "addiu          %[r0],      %[r0],          0x2000      \n\t"
 466     "addiu          %[r2],      %[r2],          0x2000      \n\t"
 467     "addiu          %[r4],      %[r4],          0x2000      \n\t"
 468     "addiu          %[r6],      %[r6],          0x2000      \n\t"
 469     "sra            %[r0],      %[r0],          14          \n\t"
 470     "sra            %[r2],      %[r2],          14          \n\t"
 471     "sra            %[r4],      %[r4],          14          \n\t"
 472     "sra            %[r6],      %[r6],          14          \n\t"
 473     "sh             %[r0],      0(%[outBuf])                \n\t"
 474     "sh             %[r2],      2(%[outBuf])                \n\t"
 475     "sh             %[r4],      4(%[outBuf])                \n\t"
 476     "sh             %[r6],      6(%[outBuf])                \n\t"
 477     "addiu          %[outBuf],  %[outBuf],      8           \n\t"
 478     "b              1b                                      \n\t"
 479     " addiu         %[iters],   %[iters],       -1          \n\t"
 480    "2:                                                      \n\t"
 481     "andi           %[after],   %[anaLen],      3           \n\t"
 482    "3:                                                      \n\t"
 483     "blez           %[after],   4f                          \n\t"
 484     " nop                                                   \n\t"
 485     "lh             %[r0],      0(%[window])                \n\t"
 486     "lh             %[r1],      0(%[anaBuf])                \n\t"
 487     "mul            %[r0],      %[r0],          %[r1]       \n\t"
 488     "addiu          %[window],  %[window],      2           \n\t"
 489     "addiu          %[anaBuf],  %[anaBuf],      2           \n\t"
 490     "addiu          %[outBuf],  %[outBuf],      2           \n\t"
 491     "addiu          %[r0],      %[r0],          0x2000      \n\t"
 492     "sra            %[r0],      %[r0],          14          \n\t"
 493     "sh             %[r0],      -2(%[outBuf])               \n\t"
 494     "b              3b                                      \n\t"
 495     " addiu         %[after],   %[after],       -1          \n\t"
 496    "4:                                                      \n\t"
 497     ".set           pop                                     \n\t"
 498     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
 499       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
 500       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
 501       [after] "=&r" (after), [window] "+r" (window),
 502       [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
 503     : [anaLen] "r" (anaLen)
 504     : "memory", "hi", "lo"
 505   );
 506 #endif
 507 }
 508
 509 // For the noise supression process, synthesis, read out fully processed
 510 // segment, and update synthesis buffer.
 511 void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
 512                                     int16_t* out_frame,
 513                                     int16_t gain_factor) {
 514
 515   int iters = inst->blockLen10ms >> 2;
 516   int after = inst->blockLen10ms & 3;
 517   int r0, r1, r2, r3, r4, r5, r6, r7;
 518   int16_t *window = (int16_t*)inst->window;
 519   int16_t *real = inst->real;
 520   int16_t *synthBuf = inst->synthesisBuffer;
 521   int16_t *out = out_frame;
 522   int sat_pos = 0x7fff;
 523   int sat_neg = 0xffff8000;
 524   int block10 = (int)inst->blockLen10ms;
 525   int anaLen = (int)inst->anaLen;
 526
 527   __asm __volatile(
 528     ".set       push                                        \n\t"
 529     ".set       noreorder                                   \n\t"
 530    "1:                                                      \n\t"
 531     "blez       %[iters],   2f                              \n\t"
 532     " nop                                                   \n\t"
 533     "lh         %[r0],      0(%[window])                    \n\t"
 534     "lh         %[r1],      0(%[real])                      \n\t"
 535     "lh         %[r2],      2(%[window])                    \n\t"
 536     "lh         %[r3],      2(%[real])                      \n\t"
 537     "lh         %[r4],      4(%[window])                    \n\t"
 538     "lh         %[r5],      4(%[real])                      \n\t"
 539     "lh         %[r6],      6(%[window])                    \n\t"
 540     "lh         %[r7],      6(%[real])                      \n\t"
 541     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 542     "mul        %[r2],      %[r2],          %[r3]           \n\t"
 543     "mul        %[r4],      %[r4],          %[r5]           \n\t"
 544     "mul        %[r6],      %[r6],          %[r7]           \n\t"
 545     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 546     "addiu      %[r2],      %[r2],          0x2000          \n\t"
 547     "addiu      %[r4],      %[r4],          0x2000          \n\t"
 548     "addiu      %[r6],      %[r6],          0x2000          \n\t"
 549     "sra        %[r0],      %[r0],          14              \n\t"
 550     "sra        %[r2],      %[r2],          14              \n\t"
 551     "sra        %[r4],      %[r4],          14              \n\t"
 552     "sra        %[r6],      %[r6],          14              \n\t"
 553     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 554     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
 555     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
 556     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
 557     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 558     "addiu      %[r2],      %[r2],          0x1000          \n\t"
 559     "addiu      %[r4],      %[r4],          0x1000          \n\t"
 560     "addiu      %[r6],      %[r6],          0x1000          \n\t"
 561     "sra        %[r0],      %[r0],          13              \n\t"
 562     "sra        %[r2],      %[r2],          13              \n\t"
 563     "sra        %[r4],      %[r4],          13              \n\t"
 564     "sra        %[r6],      %[r6],          13              \n\t"
 565     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 566     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 567     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 568     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 569     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 570     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 571     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 572     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 573     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 574     "lh         %[r3],      2(%[synthBuf])                  \n\t"
 575     "lh         %[r5],      4(%[synthBuf])                  \n\t"
 576     "lh         %[r7],      6(%[synthBuf])                  \n\t"
 577     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 578     "addu       %[r2],      %[r2],          %[r3]           \n\t"
 579     "addu       %[r4],      %[r4],          %[r5]           \n\t"
 580     "addu       %[r6],      %[r6],          %[r7]           \n\t"
 581     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 582     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 583     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 584     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 585     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 586     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 587     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 588     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 589     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 590     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
 591     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
 592     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
 593     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 594     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
 595     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
 596     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
 597     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 598     "sh         %[r2],      2(%[synthBuf])                  \n\t"
 599     "sh         %[r4],      4(%[synthBuf])                  \n\t"
 600     "sh         %[r6],      6(%[synthBuf])                  \n\t"
 601     "sh         %[r0],      0(%[out])                       \n\t"
 602     "sh         %[r2],      2(%[out])                       \n\t"
 603     "sh         %[r4],      4(%[out])                       \n\t"
 604     "sh         %[r6],      6(%[out])                       \n\t"
 605     "addiu      %[window],  %[window],      8               \n\t"
 606     "addiu      %[real],    %[real],        8               \n\t"
 607     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
 608     "addiu      %[out],     %[out],         8               \n\t"
 609     "b          1b                                          \n\t"
 610     " addiu     %[iters],   %[iters],       -1              \n\t"
 611    "2:                                                      \n\t"
 612     "blez       %[after],   3f                              \n\t"
 613     " subu      %[block10], %[anaLen],      %[block10]      \n\t"
 614     "lh         %[r0],      0(%[window])                    \n\t"
 615     "lh         %[r1],      0(%[real])                      \n\t"
 616     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 617     "addiu      %[window],  %[window],      2               \n\t"
 618     "addiu      %[real],    %[real],        2               \n\t"
 619     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 620     "sra        %[r0],      %[r0],          14              \n\t"
 621     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 622     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 623     "sra        %[r0],      %[r0],          13              \n\t"
 624     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 625     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 626     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 627     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 628     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 629     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 630     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 631     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 632     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 633     "sh         %[r0],      0(%[out])                       \n\t"
 634     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
 635     "addiu      %[out],     %[out],         2               \n\t"
 636     "b          2b                                          \n\t"
 637     " addiu     %[after],   %[after],       -1              \n\t"
 638    "3:                                                      \n\t"
 639     "sra        %[iters],   %[block10],     2               \n\t"
 640    "4:                                                      \n\t"
 641     "blez       %[iters],   5f                              \n\t"
 642     " andi      %[after],   %[block10],     3               \n\t"
 643     "lh         %[r0],      0(%[window])                    \n\t"
 644     "lh         %[r1],      0(%[real])                      \n\t"
 645     "lh         %[r2],      2(%[window])                    \n\t"
 646     "lh         %[r3],      2(%[real])                      \n\t"
 647     "lh         %[r4],      4(%[window])                    \n\t"
 648     "lh         %[r5],      4(%[real])                      \n\t"
 649     "lh         %[r6],      6(%[window])                    \n\t"
 650     "lh         %[r7],      6(%[real])                      \n\t"
 651     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 652     "mul        %[r2],      %[r2],          %[r3]           \n\t"
 653     "mul        %[r4],      %[r4],          %[r5]           \n\t"
 654     "mul        %[r6],      %[r6],          %[r7]           \n\t"
 655     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 656     "addiu      %[r2],      %[r2],          0x2000          \n\t"
 657     "addiu      %[r4],      %[r4],          0x2000          \n\t"
 658     "addiu      %[r6],      %[r6],          0x2000          \n\t"
 659     "sra        %[r0],      %[r0],          14              \n\t"
 660     "sra        %[r2],      %[r2],          14              \n\t"
 661     "sra        %[r4],      %[r4],          14              \n\t"
 662     "sra        %[r6],      %[r6],          14              \n\t"
 663     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 664     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
 665     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
 666     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
 667     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 668     "addiu      %[r2],      %[r2],          0x1000          \n\t"
 669     "addiu      %[r4],      %[r4],          0x1000          \n\t"
 670     "addiu      %[r6],      %[r6],          0x1000          \n\t"
 671     "sra        %[r0],      %[r0],          13              \n\t"
 672     "sra        %[r2],      %[r2],          13              \n\t"
 673     "sra        %[r4],      %[r4],          13              \n\t"
 674     "sra        %[r6],      %[r6],          13              \n\t"
 675     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 676     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 677     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 678     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 679     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 680     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 681     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 682     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 683     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 684     "lh         %[r3],      2(%[synthBuf])                  \n\t"
 685     "lh         %[r5],      4(%[synthBuf])                  \n\t"
 686     "lh         %[r7],      6(%[synthBuf])                  \n\t"
 687     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 688     "addu       %[r2],      %[r2],          %[r3]           \n\t"
 689     "addu       %[r4],      %[r4],          %[r5]           \n\t"
 690     "addu       %[r6],      %[r6],          %[r7]           \n\t"
 691     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 692     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 693     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 694     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 695     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 696     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 697     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 698     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 699     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 700     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
 701     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
 702     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
 703     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 704     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
 705     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
 706     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
 707     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 708     "sh         %[r2],      2(%[synthBuf])                  \n\t"
 709     "sh         %[r4],      4(%[synthBuf])                  \n\t"
 710     "sh         %[r6],      6(%[synthBuf])                  \n\t"
 711     "addiu      %[window],  %[window],      8               \n\t"
 712     "addiu      %[real],    %[real],        8               \n\t"
 713     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
 714     "b          4b                                          \n\t"
 715     " addiu     %[iters],   %[iters],       -1              \n\t"
 716    "5:                                                      \n\t"
 717     "blez       %[after],   6f                              \n\t"
 718     " nop                                                   \n\t"
 719     "lh         %[r0],      0(%[window])                    \n\t"
 720     "lh         %[r1],      0(%[real])                      \n\t"
 721     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 722     "addiu      %[window],  %[window],      2               \n\t"
 723     "addiu      %[real],    %[real],        2               \n\t"
 724     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 725     "sra        %[r0],      %[r0],          14              \n\t"
 726     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 727     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 728     "sra        %[r0],      %[r0],          13              \n\t"
 729     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 730     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 731     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 732     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 733     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 734     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 735     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 736     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 737     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 738     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
 739     "b          2b                                          \n\t"
 740     " addiu     %[after],   %[after],       -1              \n\t"
 741    "6:                                                      \n\t"
 742     ".set       pop                                         \n\t"
 743     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
 744       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
 745       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
 746       [after] "+r" (after), [block10] "+r" (block10),
 747       [window] "+r" (window), [real] "+r" (real),
 748       [synthBuf] "+r" (synthBuf), [out] "+r" (out)
 749     : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
 750       [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
 751     : "memory", "hi", "lo"
 752   );
 753
 754   // update synthesis buffer
 755   WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
 756                         inst->synthesisBuffer + inst->blockLen10ms,
 757                         inst->anaLen - inst->blockLen10ms);
 758   WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
 759       + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
 760 }
 761
 762 // Filter the data in the frequency domain, and create spectrum.
 763 void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buf) {
 764
 765   uint16_t *noiseSupFilter = inst->noiseSupFilter;
 766   int16_t *real = inst->real;
 767   int16_t *imag = inst->imag;
 768   int32_t loop_count = 2;
 769   int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
 770   int16_t tmp16 = (inst->anaLen << 1) - 4;
 771   int16_t* freq_buf_f = freq_buf;
 772   int16_t* freq_buf_s = &freq_buf[tmp16];
 773
 774   __asm __volatile (
 775     ".set       push                                                 \n\t"
 776     ".set       noreorder                                            \n\t"
 777     //first sample
 778     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
 779     "lh         %[tmp_2],           0(%[real])                       \n\t"
 780     "lh         %[tmp_3],           0(%[imag])                       \n\t"
 781     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
 782     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
 783     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
 784     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
 785     "sh         %[tmp_2],           0(%[real])                       \n\t"
 786     "sh         %[tmp_3],           0(%[imag])                       \n\t"
 787     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
 788     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
 789     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
 790     "addiu      %[real],            %[real],              2          \n\t"
 791     "addiu      %[imag],            %[imag],              2          \n\t"
 792     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    2          \n\t"
 793     "addiu      %[freq_buf_f],      %[freq_buf_f],        4          \n\t"
 794    "1:                                                               \n\t"
 795     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
 796     "lh         %[tmp_2],           0(%[real])                       \n\t"
 797     "lh         %[tmp_3],           0(%[imag])                       \n\t"
 798     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
 799     "lh         %[tmp_5],           2(%[real])                       \n\t"
 800     "lh         %[tmp_6],           2(%[imag])                       \n\t"
 801     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
 802     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
 803     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
 804     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
 805     "addiu      %[loop_count],      %[loop_count],        2          \n\t"
 806     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
 807     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
 808     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
 809     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
 810     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    4          \n\t"
 811     "sh         %[tmp_2],           0(%[real])                       \n\t"
 812     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
 813     "sh         %[tmp_3],           0(%[imag])                       \n\t"
 814     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
 815     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
 816     "sh         %[tmp_5],           2(%[real])                       \n\t"
 817     "sh         %[tmp_5],           0(%[freq_buf_s])                 \n\t"
 818     "sh         %[tmp_6],           2(%[imag])                       \n\t"
 819     "sh         %[tmp_6],           2(%[freq_buf_s])                 \n\t"
 820     "negu       %[tmp_6],           %[tmp_6]                         \n\t"
 821     "addiu      %[freq_buf_s],      %[freq_buf_s],        -8         \n\t"
 822     "addiu      %[real],            %[real],              4          \n\t"
 823     "addiu      %[imag],            %[imag],              4          \n\t"
 824     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
 825     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
 826     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
 827     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
 828     "blt        %[loop_count],      %[loop_size],         1b         \n\t"
 829     " addiu     %[freq_buf_f],      %[freq_buf_f],        8          \n\t"
 830     //last two samples:
 831     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
 832     "lh         %[tmp_2],           0(%[real])                       \n\t"
 833     "lh         %[tmp_3],           0(%[imag])                       \n\t"
 834     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
 835     "lh         %[tmp_5],           2(%[real])                       \n\t"
 836     "lh         %[tmp_6],           2(%[imag])                       \n\t"
 837     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
 838     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
 839     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
 840     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
 841     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
 842     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
 843     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
 844     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
 845     "sh         %[tmp_2],           0(%[real])                       \n\t"
 846     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
 847     "sh         %[tmp_3],           0(%[imag])                       \n\t"
 848     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
 849     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
 850     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
 851     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
 852     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
 853     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
 854     "sh         %[tmp_5],           2(%[real])                       \n\t"
 855     "sh         %[tmp_6],           2(%[imag])                       \n\t"
 856     ".set       pop                                                  \n\t"
 857     : [real] "+r" (real), [imag] "+r" (imag),
 858       [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
 859       [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
 860       [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
 861       [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
 862     : [loop_size] "r" (inst->anaLen2)
 863     : "memory", "hi", "lo"
 864   );
 865 }
 866
 867 #if defined(MIPS_DSP_R1_LE)
 868 // Denormalize the real-valued signal |in|, the output from inverse FFT.
 869 void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor) {
 870   int32_t r0, r1, r2, r3, t0;
 871   int len = inst->anaLen;
 872   int16_t *out = &inst->real[0];
 873   int shift = factor - inst->normData;
 874
 875   __asm __volatile (
 876     ".set          push                                \n\t"
 877     ".set          noreorder                           \n\t"
 878     "beqz          %[len],     8f                      \n\t"
 879     " nop                                              \n\t"
 880     "bltz          %[shift],   4f                      \n\t"
 881     " sra          %[t0],      %[len],      2          \n\t"
 882     "beqz          %[t0],      2f                      \n\t"
 883     " andi         %[len],     %[len],      3          \n\t"
 884    "1:                                                 \n\t"
 885     "lh            %[r0],      0(%[in])                \n\t"
 886     "lh            %[r1],      2(%[in])                \n\t"
 887     "lh            %[r2],      4(%[in])                \n\t"
 888     "lh            %[r3],      6(%[in])                \n\t"
 889     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
 890     "shllv_s.ph    %[r1],      %[r1],       %[shift]   \n\t"
 891     "shllv_s.ph    %[r2],      %[r2],       %[shift]   \n\t"
 892     "shllv_s.ph    %[r3],      %[r3],       %[shift]   \n\t"
 893     "addiu         %[in],      %[in],       8          \n\t"
 894     "addiu         %[t0],      %[t0],       -1         \n\t"
 895     "sh            %[r0],      0(%[out])               \n\t"
 896     "sh            %[r1],      2(%[out])               \n\t"
 897     "sh            %[r2],      4(%[out])               \n\t"
 898     "sh            %[r3],      6(%[out])               \n\t"
 899     "bgtz          %[t0],      1b                      \n\t"
 900     " addiu        %[out],     %[out],      8          \n\t"
 901    "2:                                                 \n\t"
 902     "beqz          %[len],     8f                      \n\t"
 903     " nop                                              \n\t"
 904    "3:                                                 \n\t"
 905     "lh            %[r0],      0(%[in])                \n\t"
 906     "addiu         %[in],      %[in],       2          \n\t"
 907     "addiu         %[len],     %[len],      -1         \n\t"
 908     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
 909     "addiu         %[out],     %[out],      2          \n\t"
 910     "bgtz          %[len],     3b                      \n\t"
 911     " sh           %[r0],      -2(%[out])              \n\t"
 912     "b             8f                                  \n\t"
 913    "4:                                                 \n\t"
 914     "negu          %[shift],   %[shift]                \n\t"
 915     "beqz          %[t0],      6f                      \n\t"
 916     " andi         %[len],     %[len],      3          \n\t"
 917    "5:                                                 \n\t"
 918     "lh            %[r0],      0(%[in])                \n\t"
 919     "lh            %[r1],      2(%[in])                \n\t"
 920     "lh            %[r2],      4(%[in])                \n\t"
 921     "lh            %[r3],      6(%[in])                \n\t"
 922     "srav          %[r0],      %[r0],       %[shift]   \n\t"
 923     "srav          %[r1],      %[r1],       %[shift]   \n\t"
 924     "srav          %[r2],      %[r2],       %[shift]   \n\t"
 925     "srav          %[r3],      %[r3],       %[shift]   \n\t"
 926     "addiu         %[in],      %[in],       8          \n\t"
 927     "addiu         %[t0],      %[t0],       -1         \n\t"
 928     "sh            %[r0],      0(%[out])               \n\t"
 929     "sh            %[r1],      2(%[out])               \n\t"
 930     "sh            %[r2],      4(%[out])               \n\t"
 931     "sh            %[r3],      6(%[out])               \n\t"
 932     "bgtz          %[t0],      5b                      \n\t"
 933     " addiu        %[out],     %[out],      8          \n\t"
 934    "6:                                                 \n\t"
 935     "beqz          %[len],     8f                      \n\t"
 936     " nop                                              \n\t"
 937    "7:                                                 \n\t"
 938     "lh            %[r0],      0(%[in])                \n\t"
 939     "addiu         %[in],      %[in],       2          \n\t"
 940     "addiu         %[len],     %[len],      -1         \n\t"
 941     "srav          %[r0],      %[r0],       %[shift]   \n\t"
 942     "addiu         %[out],     %[out],      2          \n\t"
 943     "bgtz          %[len],     7b                      \n\t"
 944     " sh           %[r0],      -2(%[out])              \n\t"
 945    "8:                                                 \n\t"
 946     ".set          pop                                 \n\t"
 947     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
 948       [r2] "=&r" (r2), [r3] "=&r" (r3)
 949     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
 950       [out] "r" (out)
 951     : "memory"
 952   );
 953 }
 954 #endif
 955
 956 // Normalize the real-valued signal |in|, the input to forward FFT.
 957 void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
 958                                         const int16_t* in,
 959                                         int16_t* out) {
 960   int32_t r0, r1, r2, r3, t0;
 961   int len = inst->anaLen;
 962   int shift = inst->normData;
 963
 964   __asm __volatile (
 965     ".set          push                                \n\t"
 966     ".set          noreorder                           \n\t"
 967     "beqz          %[len],     4f                      \n\t"
 968     " sra          %[t0],      %[len],      2          \n\t"
 969     "beqz          %[t0],      2f                      \n\t"
 970     " andi         %[len],     %[len],      3          \n\t"
 971    "1:                                                 \n\t"
 972     "lh            %[r0],      0(%[in])                \n\t"
 973     "lh            %[r1],      2(%[in])                \n\t"
 974     "lh            %[r2],      4(%[in])                \n\t"
 975     "lh            %[r3],      6(%[in])                \n\t"
 976     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
 977     "sllv          %[r1],      %[r1],       %[shift]   \n\t"
 978     "sllv          %[r2],      %[r2],       %[shift]   \n\t"
 979     "sllv          %[r3],      %[r3],       %[shift]   \n\t"
 980     "addiu         %[in],      %[in],       8          \n\t"
 981     "addiu         %[t0],      %[t0],       -1         \n\t"
 982     "sh            %[r0],      0(%[out])               \n\t"
 983     "sh            %[r1],      2(%[out])               \n\t"
 984     "sh            %[r2],      4(%[out])               \n\t"
 985     "sh            %[r3],      6(%[out])               \n\t"
 986     "bgtz          %[t0],      1b                      \n\t"
 987     " addiu        %[out],     %[out],      8          \n\t"
 988    "2:                                                 \n\t"
 989     "beqz          %[len],     4f                      \n\t"
 990     " nop                                              \n\t"
 991    "3:                                                 \n\t"
 992     "lh            %[r0],      0(%[in])                \n\t"
 993     "addiu         %[in],      %[in],       2          \n\t"
 994     "addiu         %[len],     %[len],      -1         \n\t"
 995     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
 996     "addiu         %[out],     %[out],      2          \n\t"
 997     "bgtz          %[len],     3b                      \n\t"
 998     " sh           %[r0],      -2(%[out])              \n\t"
 999    "4:                                                 \n\t"
1000     ".set          pop                                 \n\t"
1001     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
1002       [r2] "=&r" (r2), [r3] "=&r" (r3)
1003     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
1004       [out] "r" (out)
1005     : "memory"
1006   );
1007 }
1008