src/third_party/webrtc/modules/audio_processing/ns/nsx_core_mips.c

   1 /*
   2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12
  13 #include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
  14 #include "webrtc/modules/audio_processing/ns/nsx_core.h"
  15
  16 static const int16_t kIndicatorTable[17] = {
  17   0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
  18   7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
  19 };
  20
  21 // Compute speech/noise probability
  22 // speech/noise probability is returned in: probSpeechFinal
  23 //snrLocPrior is the prior SNR for each frequency (in Q11)
  24 //snrLocPost is the post SNR for each frequency (in Q11)
  25 void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
  26                                uint16_t* nonSpeechProbFinal,
  27                                uint32_t* priorLocSnr,
  28                                uint32_t* postLocSnr) {
  29
  30   uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
  31   int32_t indPriorFX, tmp32no1;
  32   int32_t logLrtTimeAvgKsumFX;
  33   int16_t indPriorFX16;
  34   int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
  35   int i, normTmp, nShifts;
  36
  37   int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
  38   int32_t const_max = 0x7fffffff;
  39   int32_t const_neg43 = -43;
  40   int32_t const_5412 = 5412;
  41   int32_t const_11rsh12 = (11 << 12);
  42   int32_t const_178 = 178;
  43
  44
  45   // compute feature based on average LR factor
  46   // this is the average over all frequencies of the smooth log LRT
  47   logLrtTimeAvgKsumFX = 0;
  48   for (i = 0; i < inst->magnLen; i++) {
  49     r0 = postLocSnr[i]; // Q11
  50     r1 = priorLocSnr[i];
  51     r2 = inst->logLrtTimeAvgW32[i];
  52
  53     __asm __volatile(
  54       ".set       push                                    \n\t"
  55       ".set       noreorder                               \n\t"
  56       "clz        %[r3],    %[r0]                         \n\t"
  57       "clz        %[r5],    %[r1]                         \n\t"
  58       "slti       %[r4],    %[r3],    32                  \n\t"
  59       "slti       %[r6],    %[r5],    32                  \n\t"
  60       "movz       %[r3],    $0,       %[r4]               \n\t"
  61       "movz       %[r5],    $0,       %[r6]               \n\t"
  62       "slti       %[r4],    %[r3],    11                  \n\t"
  63       "addiu      %[r6],    %[r3],    -11                 \n\t"
  64       "neg        %[r7],    %[r6]                         \n\t"
  65       "sllv       %[r6],    %[r1],    %[r6]               \n\t"
  66       "srav       %[r7],    %[r1],    %[r7]               \n\t"
  67       "movn       %[r6],    %[r7],    %[r4]               \n\t"
  68       "sllv       %[r1],    %[r1],    %[r5]               \n\t"
  69       "and        %[r1],    %[r1],    %[const_max]        \n\t"
  70       "sra        %[r1],    %[r1],    19                  \n\t"
  71       "mul        %[r7],    %[r1],    %[r1]               \n\t"
  72       "sllv       %[r3],    %[r0],    %[r3]               \n\t"
  73       "divu       %[r8],    %[r3],    %[r6]               \n\t"
  74       "slti       %[r6],    %[r6],    1                   \n\t"
  75       "mul        %[r7],    %[r7],    %[const_neg43]      \n\t"
  76       "sra        %[r7],    %[r7],    19                  \n\t"
  77       "movz       %[r3],    %[r8],    %[r6]               \n\t"
  78       "subu       %[r0],    %[r0],    %[r3]               \n\t"
  79       "movn       %[r0],    $0,       %[r6]               \n\t"
  80       "mul        %[r1],    %[r1],    %[const_5412]       \n\t"
  81       "sra        %[r1],    %[r1],    12                  \n\t"
  82       "addu       %[r7],    %[r7],    %[r1]               \n\t"
  83       "addiu      %[r1],    %[r7],    37                  \n\t"
  84       "addiu      %[r5],    %[r5],    -31                 \n\t"
  85       "neg        %[r5],    %[r5]                         \n\t"
  86       "sll        %[r5],    %[r5],    12                  \n\t"
  87       "addu       %[r5],    %[r5],    %[r1]               \n\t"
  88       "subu       %[r7],    %[r5],    %[const_11rsh12]    \n\t"
  89       "mul        %[r7],    %[r7],    %[const_178]        \n\t"
  90       "sra        %[r7],    %[r7],    8                   \n\t"
  91       "addu       %[r7],    %[r7],    %[r2]               \n\t"
  92       "sra        %[r7],    %[r7],    1                   \n\t"
  93       "subu       %[r2],    %[r2],    %[r7]               \n\t"
  94       "addu       %[r2],    %[r2],    %[r0]               \n\t"
  95       ".set       pop                                     \n\t"
  96       : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
  97         [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
  98         [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
  99       : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
 100         [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
 101         [const_178] "r" (const_178)
 102       : "hi", "lo"
 103     );
 104     inst->logLrtTimeAvgW32[i] = r2;
 105     logLrtTimeAvgKsumFX += r2;
 106   }
 107
 108   inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
 109                                               inst->stages + 10);
 110                                                   // 5 = BIN_SIZE_LRT / 2
 111   // done with computation of LR factor
 112
 113   //
 114   // compute the indicator functions
 115   //
 116
 117   // average LRT feature
 118   // FLOAT code
 119   // indicator0 = 0.5 * (tanh(widthPrior *
 120   //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
 121   tmpIndFX = 16384; // Q14(1.0)
 122   tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
 123   nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
 124   //use larger width in tanh map for pause regions
 125   if (tmp32no1 < 0) {
 126     tmpIndFX = 0;
 127     tmp32no1 = -tmp32no1;
 128     //widthPrior = widthPrior * 2.0;
 129     nShifts++;
 130   }
 131   tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
 132   // compute indicator function: sigmoid map
 133   tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
 134   if ((tableIndex < 16) && (tableIndex >= 0)) {
 135     tmp16no2 = kIndicatorTable[tableIndex];
 136     tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
 137     frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
 138     tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
 139     if (tmpIndFX == 0) {
 140       tmpIndFX = 8192 - tmp16no2; // Q14
 141     } else {
 142       tmpIndFX = 8192 + tmp16no2; // Q14
 143     }
 144   }
 145   indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
 146
 147   //spectral flatness feature
 148   if (inst->weightSpecFlat) {
 149     tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
 150     tmpIndFX = 16384; // Q14(1.0)
 151     //use larger width in tanh map for pause regions
 152     tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
 153     nShifts = 4;
 154     if (inst->thresholdSpecFlat < tmpU32no1) {
 155       tmpIndFX = 0;
 156       tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
 157       //widthPrior = widthPrior * 2.0;
 158       nShifts++;
 159     }
 160     tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25);  //Q14
 161     // compute indicator function: sigmoid map
 162     // FLOAT code
 163     // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
 164     //                          (threshPrior1 - tmpFloat1)) + 1.0);
 165     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
 166     if (tableIndex < 16) {
 167       tmp16no2 = kIndicatorTable[tableIndex];
 168       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
 169       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
 170       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
 171       if (tmpIndFX) {
 172         tmpIndFX = 8192 + tmp16no2; // Q14
 173       } else {
 174         tmpIndFX = 8192 - tmp16no2; // Q14
 175       }
 176     }
 177     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
 178   }
 179
 180   //for template spectral-difference
 181   if (inst->weightSpecDiff) {
 182     tmpU32no1 = 0;
 183     if (inst->featureSpecDiff) {
 184       normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
 185                                WebRtcSpl_NormU32(inst->featureSpecDiff));
 186       assert(normTmp >= 0);
 187       tmpU32no1 = inst->featureSpecDiff << normTmp;  // Q(normTmp-2*stages)
 188       tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
 189                                         20 - inst->stages - normTmp);
 190       if (tmpU32no2 > 0) {
 191         // Q(20 - inst->stages)
 192         tmpU32no1 /= tmpU32no2;
 193       } else {
 194         tmpU32no1 = (uint32_t)(0x7fffffff);
 195       }
 196     }
 197     tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
 198     tmpU32no2 = tmpU32no1 - tmpU32no3;
 199     nShifts = 1;
 200     tmpIndFX = 16384; // Q14(1.0)
 201     //use larger width in tanh map for pause regions
 202     if (tmpU32no2 & 0x80000000) {
 203       tmpIndFX = 0;
 204       tmpU32no2 = tmpU32no3 - tmpU32no1;
 205       //widthPrior = widthPrior * 2.0;
 206       nShifts--;
 207     }
 208     tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
 209     // compute indicator function: sigmoid map
 210     /* FLOAT code
 211      indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
 212      */
 213     tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
 214     if (tableIndex < 16) {
 215       tmp16no2 = kIndicatorTable[tableIndex];
 216       tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
 217       frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
 218       tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
 219                     tmp16no1, frac, 14);
 220       if (tmpIndFX) {
 221         tmpIndFX = 8192 + tmp16no2;
 222       } else {
 223         tmpIndFX = 8192 - tmp16no2;
 224       }
 225     }
 226     indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
 227   }
 228
 229   //combine the indicator function with the feature weights
 230   // FLOAT code
 231   // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
 232   //                 indicator1 + weightIndPrior2 * indicator2);
 233   indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
 234   // done with computing indicator function
 235
 236   //compute the prior probability
 237   // FLOAT code
 238   // inst->priorNonSpeechProb += PRIOR_UPDATE *
 239   //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
 240   tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
 241   inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
 242                                 PRIOR_UPDATE_Q14, tmp16, 14); // Q14
 243
 244   //final speech probability: combine prior model with LR factor:
 245
 246   memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
 247
 248   if (inst->priorNonSpeechProb > 0) {
 249     r0 = inst->priorNonSpeechProb;
 250     r1 = 16384 - r0;
 251     int32_t const_23637 = 23637;
 252     int32_t const_44 = 44;
 253     int32_t const_84 = 84;
 254     int32_t const_1 = 1;
 255     int32_t const_neg8 = -8;
 256     for (i = 0; i < inst->magnLen; i++) {
 257       r2 = inst->logLrtTimeAvgW32[i];
 258       if (r2 < 65300) {
 259         __asm __volatile(
 260           ".set         push                                      \n\t"
 261           ".set         noreorder                                 \n\t"
 262           "mul          %[r2],    %[r2],          %[const_23637]  \n\t"
 263           "sll          %[r6],    %[r1],          16              \n\t"
 264           "clz          %[r7],    %[r6]                           \n\t"
 265           "clo          %[r8],    %[r6]                           \n\t"
 266           "slt          %[r9],    %[r6],          $0              \n\t"
 267           "movn         %[r7],    %[r8],          %[r9]           \n\t"
 268           "sra          %[r2],    %[r2],          14              \n\t"
 269           "andi         %[r3],    %[r2],          0xfff           \n\t"
 270           "mul          %[r4],    %[r3],          %[r3]           \n\t"
 271           "mul          %[r3],    %[r3],          %[const_84]     \n\t"
 272           "sra          %[r2],    %[r2],          12              \n\t"
 273           "slt          %[r5],    %[r2],          %[const_neg8]   \n\t"
 274           "movn         %[r2],    %[const_neg8],  %[r5]           \n\t"
 275           "mul          %[r4],    %[r4],          %[const_44]     \n\t"
 276           "sra          %[r3],    %[r3],          7               \n\t"
 277           "addiu        %[r7],    %[r7],          -1              \n\t"
 278           "slti         %[r9],    %[r7],          31              \n\t"
 279           "movz         %[r7],    $0,             %[r9]           \n\t"
 280           "sra          %[r4],    %[r4],          19              \n\t"
 281           "addu         %[r4],    %[r4],          %[r3]           \n\t"
 282           "addiu        %[r3],    %[r2],          8               \n\t"
 283           "addiu        %[r2],    %[r2],          -4              \n\t"
 284           "neg          %[r5],    %[r2]                           \n\t"
 285           "sllv         %[r6],    %[r4],          %[r2]           \n\t"
 286           "srav         %[r5],    %[r4],          %[r5]           \n\t"
 287           "slt          %[r2],    %[r2],          $0              \n\t"
 288           "movn         %[r6],    %[r5],          %[r2]           \n\t"
 289           "sllv         %[r3],    %[const_1],     %[r3]           \n\t"
 290           "addu         %[r2],    %[r3],          %[r6]           \n\t"
 291           "clz          %[r4],    %[r2]                           \n\t"
 292           "clo          %[r5],    %[r2]                           \n\t"
 293           "slt          %[r8],    %[r2],          $0              \n\t"
 294           "movn         %[r4],    %[r5],          %[r8]           \n\t"
 295           "addiu        %[r4],    %[r4],          -1              \n\t"
 296           "slt          %[r5],    $0,             %[r2]           \n\t"
 297           "or           %[r5],    %[r5],          %[r7]           \n\t"
 298           "movz         %[r4],    $0,             %[r5]           \n\t"
 299           "addiu        %[r6],    %[r7],          -7              \n\t"
 300           "addu         %[r6],    %[r6],          %[r4]           \n\t"
 301           "bltz         %[r6],    1f                              \n\t"
 302           " nop                                                   \n\t"
 303           "addiu        %[r4],    %[r6],          -8              \n\t"
 304           "neg          %[r3],    %[r4]                           \n\t"
 305           "srav         %[r5],    %[r2],          %[r3]           \n\t"
 306           "mul          %[r5],    %[r5],          %[r1]           \n\t"
 307           "mul          %[r2],    %[r2],          %[r1]           \n\t"
 308           "slt          %[r4],    %[r4],          $0              \n\t"
 309           "srav         %[r5],    %[r5],          %[r6]           \n\t"
 310           "sra          %[r2],    %[r2],          8               \n\t"
 311           "movn         %[r2],    %[r5],          %[r4]           \n\t"
 312           "sll          %[r3],    %[r0],          8               \n\t"
 313           "addu         %[r2],    %[r0],          %[r2]           \n\t"
 314           "divu         %[r3],    %[r3],          %[r2]           \n\t"
 315          "1:                                                      \n\t"
 316           ".set         pop                                       \n\t"
 317           : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
 318             [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
 319             [r8] "=&r" (r8), [r9] "=&r" (r9)
 320           : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
 321             [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
 322             [const_1] "r" (const_1), [const_44] "r" (const_44)
 323           : "hi", "lo"
 324         );
 325         nonSpeechProbFinal[i] = r3;
 326       }
 327     }
 328   }
 329 }
 330
 331 // Update analysis buffer for lower band, and window data before FFT.
 332 void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
 333                                    int16_t* out,
 334                                    int16_t* new_speech) {
 335
 336   int iters, after;
 337   int anaLen = inst->anaLen;
 338   int *window = (int*)inst->window;
 339   int *anaBuf = (int*)inst->analysisBuffer;
 340   int *outBuf = (int*)out;
 341   int r0, r1, r2, r3, r4, r5, r6, r7;
 342 #if defined(MIPS_DSP_R1_LE)
 343   int r8;
 344 #endif
 345
 346   // For lower band update analysis buffer.
 347   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
 348                         inst->analysisBuffer + inst->blockLen10ms,
 349                         inst->anaLen - inst->blockLen10ms);
 350   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
 351       + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
 352
 353   // Window data before FFT.
 354 #if defined(MIPS_DSP_R1_LE)
 355   __asm __volatile(
 356     ".set              push                                \n\t"
 357     ".set              noreorder                           \n\t"
 358     "sra               %[iters],   %[anaLen],    3         \n\t"
 359    "1:                                                     \n\t"
 360     "blez              %[iters],   2f                      \n\t"
 361     " nop                                                  \n\t"
 362     "lw                %[r0],      0(%[window])            \n\t"
 363     "lw                %[r1],      0(%[anaBuf])            \n\t"
 364     "lw                %[r2],      4(%[window])            \n\t"
 365     "lw                %[r3],      4(%[anaBuf])            \n\t"
 366     "lw                %[r4],      8(%[window])            \n\t"
 367     "lw                %[r5],      8(%[anaBuf])            \n\t"
 368     "lw                %[r6],      12(%[window])           \n\t"
 369     "lw                %[r7],      12(%[anaBuf])           \n\t"
 370     "muleq_s.w.phl     %[r8],      %[r0],        %[r1]     \n\t"
 371     "muleq_s.w.phr     %[r0],      %[r0],        %[r1]     \n\t"
 372     "muleq_s.w.phl     %[r1],      %[r2],        %[r3]     \n\t"
 373     "muleq_s.w.phr     %[r2],      %[r2],        %[r3]     \n\t"
 374     "muleq_s.w.phl     %[r3],      %[r4],        %[r5]     \n\t"
 375     "muleq_s.w.phr     %[r4],      %[r4],        %[r5]     \n\t"
 376     "muleq_s.w.phl     %[r5],      %[r6],        %[r7]     \n\t"
 377     "muleq_s.w.phr     %[r6],      %[r6],        %[r7]     \n\t"
 378 #if defined(MIPS_DSP_R2_LE)
 379     "precr_sra_r.ph.w  %[r8],      %[r0],        15        \n\t"
 380     "precr_sra_r.ph.w  %[r1],      %[r2],        15        \n\t"
 381     "precr_sra_r.ph.w  %[r3],      %[r4],        15        \n\t"
 382     "precr_sra_r.ph.w  %[r5],      %[r6],        15        \n\t"
 383     "sw                %[r8],      0(%[outBuf])            \n\t"
 384     "sw                %[r1],      4(%[outBuf])            \n\t"
 385     "sw                %[r3],      8(%[outBuf])            \n\t"
 386     "sw                %[r5],      12(%[outBuf])           \n\t"
 387 #else
 388     "shra_r.w          %[r8],      %[r8],        15        \n\t"
 389     "shra_r.w          %[r0],      %[r0],        15        \n\t"
 390     "shra_r.w          %[r1],      %[r1],        15        \n\t"
 391     "shra_r.w          %[r2],      %[r2],        15        \n\t"
 392     "shra_r.w          %[r3],      %[r3],        15        \n\t"
 393     "shra_r.w          %[r4],      %[r4],        15        \n\t"
 394     "shra_r.w          %[r5],      %[r5],        15        \n\t"
 395     "shra_r.w          %[r6],      %[r6],        15        \n\t"
 396     "sll               %[r0],      %[r0],        16        \n\t"
 397     "sll               %[r2],      %[r2],        16        \n\t"
 398     "sll               %[r4],      %[r4],        16        \n\t"
 399     "sll               %[r6],      %[r6],        16        \n\t"
 400     "packrl.ph         %[r0],      %[r8],        %[r0]     \n\t"
 401     "packrl.ph         %[r2],      %[r1],        %[r2]     \n\t"
 402     "packrl.ph         %[r4],      %[r3],        %[r4]     \n\t"
 403     "packrl.ph         %[r6],      %[r5],        %[r6]     \n\t"
 404     "sw                %[r0],      0(%[outBuf])            \n\t"
 405     "sw                %[r2],      4(%[outBuf])            \n\t"
 406     "sw                %[r4],      8(%[outBuf])            \n\t"
 407     "sw                %[r6],      12(%[outBuf])           \n\t"
 408 #endif
 409     "addiu             %[window],  %[window],    16        \n\t"
 410     "addiu             %[anaBuf],  %[anaBuf],    16        \n\t"
 411     "addiu             %[outBuf],  %[outBuf],    16        \n\t"
 412     "b                 1b                                  \n\t"
 413     " addiu            %[iters],   %[iters],     -1        \n\t"
 414    "2:                                                     \n\t"
 415     "andi              %[after],   %[anaLen],    7         \n\t"
 416    "3:                                                     \n\t"
 417     "blez              %[after],   4f                      \n\t"
 418     " nop                                                  \n\t"
 419     "lh                %[r0],      0(%[window])            \n\t"
 420     "lh                %[r1],      0(%[anaBuf])            \n\t"
 421     "mul               %[r0],      %[r0],        %[r1]     \n\t"
 422     "addiu             %[window],  %[window],    2         \n\t"
 423     "addiu             %[anaBuf],  %[anaBuf],    2         \n\t"
 424     "addiu             %[outBuf],  %[outBuf],    2         \n\t"
 425     "shra_r.w          %[r0],      %[r0],        14        \n\t"
 426     "sh                %[r0],      -2(%[outBuf])           \n\t"
 427     "b                 3b                                  \n\t"
 428     " addiu            %[after],   %[after],     -1        \n\t"
 429    "4:                                                     \n\t"
 430     ".set              pop                                 \n\t"
 431     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
 432       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
 433       [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
 434       [iters] "=&r" (iters), [after] "=&r" (after),
 435       [window] "+r" (window),[anaBuf] "+r" (anaBuf),
 436       [outBuf] "+r" (outBuf)
 437     : [anaLen] "r" (anaLen)
 438     : "memory", "hi", "lo"
 439   );
 440 #else
 441   __asm  __volatile(
 442     ".set           push                                    \n\t"
 443     ".set           noreorder                               \n\t"
 444     "sra            %[iters],   %[anaLen],      2           \n\t"
 445    "1:                                                      \n\t"
 446     "blez           %[iters],   2f                          \n\t"
 447     " nop                                                   \n\t"
 448     "lh             %[r0],      0(%[window])                \n\t"
 449     "lh             %[r1],      0(%[anaBuf])                \n\t"
 450     "lh             %[r2],      2(%[window])                \n\t"
 451     "lh             %[r3],      2(%[anaBuf])                \n\t"
 452     "lh             %[r4],      4(%[window])                \n\t"
 453     "lh             %[r5],      4(%[anaBuf])                \n\t"
 454     "lh             %[r6],      6(%[window])                \n\t"
 455     "lh             %[r7],      6(%[anaBuf])                \n\t"
 456     "mul            %[r0],      %[r0],          %[r1]       \n\t"
 457     "mul            %[r2],      %[r2],          %[r3]       \n\t"
 458     "mul            %[r4],      %[r4],          %[r5]       \n\t"
 459     "mul            %[r6],      %[r6],          %[r7]       \n\t"
 460     "addiu          %[window],  %[window],      8           \n\t"
 461     "addiu          %[anaBuf],  %[anaBuf],      8           \n\t"
 462     "addiu          %[r0],      %[r0],          0x2000      \n\t"
 463     "addiu          %[r2],      %[r2],          0x2000      \n\t"
 464     "addiu          %[r4],      %[r4],          0x2000      \n\t"
 465     "addiu          %[r6],      %[r6],          0x2000      \n\t"
 466     "sra            %[r0],      %[r0],          14          \n\t"
 467     "sra            %[r2],      %[r2],          14          \n\t"
 468     "sra            %[r4],      %[r4],          14          \n\t"
 469     "sra            %[r6],      %[r6],          14          \n\t"
 470     "sh             %[r0],      0(%[outBuf])                \n\t"
 471     "sh             %[r2],      2(%[outBuf])                \n\t"
 472     "sh             %[r4],      4(%[outBuf])                \n\t"
 473     "sh             %[r6],      6(%[outBuf])                \n\t"
 474     "addiu          %[outBuf],  %[outBuf],      8           \n\t"
 475     "b              1b                                      \n\t"
 476     " addiu         %[iters],   %[iters],       -1          \n\t"
 477    "2:                                                      \n\t"
 478     "andi           %[after],   %[anaLen],      3           \n\t"
 479    "3:                                                      \n\t"
 480     "blez           %[after],   4f                          \n\t"
 481     " nop                                                   \n\t"
 482     "lh             %[r0],      0(%[window])                \n\t"
 483     "lh             %[r1],      0(%[anaBuf])                \n\t"
 484     "mul            %[r0],      %[r0],          %[r1]       \n\t"
 485     "addiu          %[window],  %[window],      2           \n\t"
 486     "addiu          %[anaBuf],  %[anaBuf],      2           \n\t"
 487     "addiu          %[outBuf],  %[outBuf],      2           \n\t"
 488     "addiu          %[r0],      %[r0],          0x2000      \n\t"
 489     "sra            %[r0],      %[r0],          14          \n\t"
 490     "sh             %[r0],      -2(%[outBuf])               \n\t"
 491     "b              3b                                      \n\t"
 492     " addiu         %[after],   %[after],       -1          \n\t"
 493    "4:                                                      \n\t"
 494     ".set           pop                                     \n\t"
 495     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
 496       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
 497       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
 498       [after] "=&r" (after), [window] "+r" (window),
 499       [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
 500     : [anaLen] "r" (anaLen)
 501     : "memory", "hi", "lo"
 502   );
 503 #endif
 504 }
 505
 506 // For the noise supression process, synthesis, read out fully processed
 507 // segment, and update synthesis buffer.
 508 void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
 509                                     int16_t* out_frame,
 510                                     int16_t gain_factor) {
 511
 512   int iters = inst->blockLen10ms >> 2;
 513   int after = inst->blockLen10ms & 3;
 514   int r0, r1, r2, r3, r4, r5, r6, r7;
 515   int16_t *window = (int16_t*)inst->window;
 516   int16_t *real = inst->real;
 517   int16_t *synthBuf = inst->synthesisBuffer;
 518   int16_t *out = out_frame;
 519   int sat_pos = 0x7fff;
 520   int sat_neg = 0xffff8000;
 521   int block10 = (int)inst->blockLen10ms;
 522   int anaLen = (int)inst->anaLen;
 523
 524   __asm __volatile(
 525     ".set       push                                        \n\t"
 526     ".set       noreorder                                   \n\t"
 527    "1:                                                      \n\t"
 528     "blez       %[iters],   2f                              \n\t"
 529     " nop                                                   \n\t"
 530     "lh         %[r0],      0(%[window])                    \n\t"
 531     "lh         %[r1],      0(%[real])                      \n\t"
 532     "lh         %[r2],      2(%[window])                    \n\t"
 533     "lh         %[r3],      2(%[real])                      \n\t"
 534     "lh         %[r4],      4(%[window])                    \n\t"
 535     "lh         %[r5],      4(%[real])                      \n\t"
 536     "lh         %[r6],      6(%[window])                    \n\t"
 537     "lh         %[r7],      6(%[real])                      \n\t"
 538     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 539     "mul        %[r2],      %[r2],          %[r3]           \n\t"
 540     "mul        %[r4],      %[r4],          %[r5]           \n\t"
 541     "mul        %[r6],      %[r6],          %[r7]           \n\t"
 542     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 543     "addiu      %[r2],      %[r2],          0x2000          \n\t"
 544     "addiu      %[r4],      %[r4],          0x2000          \n\t"
 545     "addiu      %[r6],      %[r6],          0x2000          \n\t"
 546     "sra        %[r0],      %[r0],          14              \n\t"
 547     "sra        %[r2],      %[r2],          14              \n\t"
 548     "sra        %[r4],      %[r4],          14              \n\t"
 549     "sra        %[r6],      %[r6],          14              \n\t"
 550     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 551     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
 552     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
 553     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
 554     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 555     "addiu      %[r2],      %[r2],          0x1000          \n\t"
 556     "addiu      %[r4],      %[r4],          0x1000          \n\t"
 557     "addiu      %[r6],      %[r6],          0x1000          \n\t"
 558     "sra        %[r0],      %[r0],          13              \n\t"
 559     "sra        %[r2],      %[r2],          13              \n\t"
 560     "sra        %[r4],      %[r4],          13              \n\t"
 561     "sra        %[r6],      %[r6],          13              \n\t"
 562     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 563     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 564     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 565     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 566     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 567     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 568     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 569     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 570     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 571     "lh         %[r3],      2(%[synthBuf])                  \n\t"
 572     "lh         %[r5],      4(%[synthBuf])                  \n\t"
 573     "lh         %[r7],      6(%[synthBuf])                  \n\t"
 574     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 575     "addu       %[r2],      %[r2],          %[r3]           \n\t"
 576     "addu       %[r4],      %[r4],          %[r5]           \n\t"
 577     "addu       %[r6],      %[r6],          %[r7]           \n\t"
 578     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 579     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 580     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 581     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 582     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 583     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 584     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 585     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 586     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 587     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
 588     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
 589     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
 590     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 591     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
 592     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
 593     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
 594     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 595     "sh         %[r2],      2(%[synthBuf])                  \n\t"
 596     "sh         %[r4],      4(%[synthBuf])                  \n\t"
 597     "sh         %[r6],      6(%[synthBuf])                  \n\t"
 598     "sh         %[r0],      0(%[out])                       \n\t"
 599     "sh         %[r2],      2(%[out])                       \n\t"
 600     "sh         %[r4],      4(%[out])                       \n\t"
 601     "sh         %[r6],      6(%[out])                       \n\t"
 602     "addiu      %[window],  %[window],      8               \n\t"
 603     "addiu      %[real],    %[real],        8               \n\t"
 604     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
 605     "addiu      %[out],     %[out],         8               \n\t"
 606     "b          1b                                          \n\t"
 607     " addiu     %[iters],   %[iters],       -1              \n\t"
 608    "2:                                                      \n\t"
 609     "blez       %[after],   3f                              \n\t"
 610     " subu      %[block10], %[anaLen],      %[block10]      \n\t"
 611     "lh         %[r0],      0(%[window])                    \n\t"
 612     "lh         %[r1],      0(%[real])                      \n\t"
 613     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 614     "addiu      %[window],  %[window],      2               \n\t"
 615     "addiu      %[real],    %[real],        2               \n\t"
 616     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 617     "sra        %[r0],      %[r0],          14              \n\t"
 618     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 619     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 620     "sra        %[r0],      %[r0],          13              \n\t"
 621     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 622     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 623     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 624     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 625     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 626     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 627     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 628     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 629     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 630     "sh         %[r0],      0(%[out])                       \n\t"
 631     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
 632     "addiu      %[out],     %[out],         2               \n\t"
 633     "b          2b                                          \n\t"
 634     " addiu     %[after],   %[after],       -1              \n\t"
 635    "3:                                                      \n\t"
 636     "sra        %[iters],   %[block10],     2               \n\t"
 637    "4:                                                      \n\t"
 638     "blez       %[iters],   5f                              \n\t"
 639     " andi      %[after],   %[block10],     3               \n\t"
 640     "lh         %[r0],      0(%[window])                    \n\t"
 641     "lh         %[r1],      0(%[real])                      \n\t"
 642     "lh         %[r2],      2(%[window])                    \n\t"
 643     "lh         %[r3],      2(%[real])                      \n\t"
 644     "lh         %[r4],      4(%[window])                    \n\t"
 645     "lh         %[r5],      4(%[real])                      \n\t"
 646     "lh         %[r6],      6(%[window])                    \n\t"
 647     "lh         %[r7],      6(%[real])                      \n\t"
 648     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 649     "mul        %[r2],      %[r2],          %[r3]           \n\t"
 650     "mul        %[r4],      %[r4],          %[r5]           \n\t"
 651     "mul        %[r6],      %[r6],          %[r7]           \n\t"
 652     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 653     "addiu      %[r2],      %[r2],          0x2000          \n\t"
 654     "addiu      %[r4],      %[r4],          0x2000          \n\t"
 655     "addiu      %[r6],      %[r6],          0x2000          \n\t"
 656     "sra        %[r0],      %[r0],          14              \n\t"
 657     "sra        %[r2],      %[r2],          14              \n\t"
 658     "sra        %[r4],      %[r4],          14              \n\t"
 659     "sra        %[r6],      %[r6],          14              \n\t"
 660     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 661     "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
 662     "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
 663     "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
 664     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 665     "addiu      %[r2],      %[r2],          0x1000          \n\t"
 666     "addiu      %[r4],      %[r4],          0x1000          \n\t"
 667     "addiu      %[r6],      %[r6],          0x1000          \n\t"
 668     "sra        %[r0],      %[r0],          13              \n\t"
 669     "sra        %[r2],      %[r2],          13              \n\t"
 670     "sra        %[r4],      %[r4],          13              \n\t"
 671     "sra        %[r6],      %[r6],          13              \n\t"
 672     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 673     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 674     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 675     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 676     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 677     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 678     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 679     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 680     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 681     "lh         %[r3],      2(%[synthBuf])                  \n\t"
 682     "lh         %[r5],      4(%[synthBuf])                  \n\t"
 683     "lh         %[r7],      6(%[synthBuf])                  \n\t"
 684     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 685     "addu       %[r2],      %[r2],          %[r3]           \n\t"
 686     "addu       %[r4],      %[r4],          %[r5]           \n\t"
 687     "addu       %[r6],      %[r6],          %[r7]           \n\t"
 688     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 689     "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
 690     "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
 691     "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
 692     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 693     "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
 694     "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
 695     "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
 696     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 697     "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
 698     "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
 699     "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
 700     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 701     "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
 702     "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
 703     "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
 704     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 705     "sh         %[r2],      2(%[synthBuf])                  \n\t"
 706     "sh         %[r4],      4(%[synthBuf])                  \n\t"
 707     "sh         %[r6],      6(%[synthBuf])                  \n\t"
 708     "addiu      %[window],  %[window],      8               \n\t"
 709     "addiu      %[real],    %[real],        8               \n\t"
 710     "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
 711     "b          4b                                          \n\t"
 712     " addiu     %[iters],   %[iters],       -1              \n\t"
 713    "5:                                                      \n\t"
 714     "blez       %[after],   6f                              \n\t"
 715     " nop                                                   \n\t"
 716     "lh         %[r0],      0(%[window])                    \n\t"
 717     "lh         %[r1],      0(%[real])                      \n\t"
 718     "mul        %[r0],      %[r0],          %[r1]           \n\t"
 719     "addiu      %[window],  %[window],      2               \n\t"
 720     "addiu      %[real],    %[real],        2               \n\t"
 721     "addiu      %[r0],      %[r0],          0x2000          \n\t"
 722     "sra        %[r0],      %[r0],          14              \n\t"
 723     "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
 724     "addiu      %[r0],      %[r0],          0x1000          \n\t"
 725     "sra        %[r0],      %[r0],          13              \n\t"
 726     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 727     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 728     "lh         %[r1],      0(%[synthBuf])                  \n\t"
 729     "addu       %[r0],      %[r0],          %[r1]           \n\t"
 730     "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
 731     "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
 732     "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
 733     "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
 734     "sh         %[r0],      0(%[synthBuf])                  \n\t"
 735     "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
 736     "b          2b                                          \n\t"
 737     " addiu     %[after],   %[after],       -1              \n\t"
 738    "6:                                                      \n\t"
 739     ".set       pop                                         \n\t"
 740     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
 741       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
 742       [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
 743       [after] "+r" (after), [block10] "+r" (block10),
 744       [window] "+r" (window), [real] "+r" (real),
 745       [synthBuf] "+r" (synthBuf), [out] "+r" (out)
 746     : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
 747       [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
 748     : "memory", "hi", "lo"
 749   );
 750
 751   // update synthesis buffer
 752   WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
 753                         inst->synthesisBuffer + inst->blockLen10ms,
 754                         inst->anaLen - inst->blockLen10ms);
 755   WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
 756       + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
 757 }
 758
 759 // Filter the data in the frequency domain, and create spectrum.
 760 void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buf) {
 761
 762   uint16_t *noiseSupFilter = inst->noiseSupFilter;
 763   int16_t *real = inst->real;
 764   int16_t *imag = inst->imag;
 765   int32_t loop_count = 2;
 766   int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
 767   int16_t tmp16 = (inst->anaLen << 1) - 4;
 768   int16_t* freq_buf_f = freq_buf;
 769   int16_t* freq_buf_s = &freq_buf[tmp16];
 770
 771   __asm __volatile (
 772     ".set       push                                                 \n\t"
 773     ".set       noreorder                                            \n\t"
 774     //first sample
 775     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
 776     "lh         %[tmp_2],           0(%[real])                       \n\t"
 777     "lh         %[tmp_3],           0(%[imag])                       \n\t"
 778     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
 779     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
 780     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
 781     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
 782     "sh         %[tmp_2],           0(%[real])                       \n\t"
 783     "sh         %[tmp_3],           0(%[imag])                       \n\t"
 784     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
 785     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
 786     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
 787     "addiu      %[real],            %[real],              2          \n\t"
 788     "addiu      %[imag],            %[imag],              2          \n\t"
 789     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    2          \n\t"
 790     "addiu      %[freq_buf_f],      %[freq_buf_f],        4          \n\t"
 791    "1:                                                               \n\t"
 792     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
 793     "lh         %[tmp_2],           0(%[real])                       \n\t"
 794     "lh         %[tmp_3],           0(%[imag])                       \n\t"
 795     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
 796     "lh         %[tmp_5],           2(%[real])                       \n\t"
 797     "lh         %[tmp_6],           2(%[imag])                       \n\t"
 798     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
 799     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
 800     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
 801     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
 802     "addiu      %[loop_count],      %[loop_count],        2          \n\t"
 803     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
 804     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
 805     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
 806     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
 807     "addiu      %[noiseSupFilter],  %[noiseSupFilter],    4          \n\t"
 808     "sh         %[tmp_2],           0(%[real])                       \n\t"
 809     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
 810     "sh         %[tmp_3],           0(%[imag])                       \n\t"
 811     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
 812     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
 813     "sh         %[tmp_5],           2(%[real])                       \n\t"
 814     "sh         %[tmp_5],           0(%[freq_buf_s])                 \n\t"
 815     "sh         %[tmp_6],           2(%[imag])                       \n\t"
 816     "sh         %[tmp_6],           2(%[freq_buf_s])                 \n\t"
 817     "negu       %[tmp_6],           %[tmp_6]                         \n\t"
 818     "addiu      %[freq_buf_s],      %[freq_buf_s],        -8         \n\t"
 819     "addiu      %[real],            %[real],              4          \n\t"
 820     "addiu      %[imag],            %[imag],              4          \n\t"
 821     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
 822     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
 823     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
 824     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
 825     "blt        %[loop_count],      %[loop_size],         1b         \n\t"
 826     " addiu     %[freq_buf_f],      %[freq_buf_f],        8          \n\t"
 827     //last two samples:
 828     "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
 829     "lh         %[tmp_2],           0(%[real])                       \n\t"
 830     "lh         %[tmp_3],           0(%[imag])                       \n\t"
 831     "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
 832     "lh         %[tmp_5],           2(%[real])                       \n\t"
 833     "lh         %[tmp_6],           2(%[imag])                       \n\t"
 834     "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
 835     "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
 836     "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
 837     "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
 838     "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
 839     "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
 840     "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
 841     "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
 842     "sh         %[tmp_2],           0(%[real])                       \n\t"
 843     "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
 844     "sh         %[tmp_3],           0(%[imag])                       \n\t"
 845     "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
 846     "negu       %[tmp_3],           %[tmp_3]                         \n\t"
 847     "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
 848     "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
 849     "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
 850     "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
 851     "sh         %[tmp_5],           2(%[real])                       \n\t"
 852     "sh         %[tmp_6],           2(%[imag])                       \n\t"
 853     ".set       pop                                                  \n\t"
 854     : [real] "+r" (real), [imag] "+r" (imag),
 855       [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
 856       [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
 857       [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
 858       [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
 859     : [loop_size] "r" (inst->anaLen2)
 860     : "memory", "hi", "lo"
 861   );
 862 }
 863
 864 #if defined(MIPS_DSP_R1_LE)
 865 // Denormalize the real-valued signal |in|, the output from inverse FFT.
 866 void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor) {
 867   int32_t r0, r1, r2, r3, t0;
 868   int len = inst->anaLen;
 869   int16_t *out = &inst->real[0];
 870   int shift = factor - inst->normData;
 871
 872   __asm __volatile (
 873     ".set          push                                \n\t"
 874     ".set          noreorder                           \n\t"
 875     "beqz          %[len],     8f                      \n\t"
 876     " nop                                              \n\t"
 877     "bltz          %[shift],   4f                      \n\t"
 878     " sra          %[t0],      %[len],      2          \n\t"
 879     "beqz          %[t0],      2f                      \n\t"
 880     " andi         %[len],     %[len],      3          \n\t"
 881    "1:                                                 \n\t"
 882     "lh            %[r0],      0(%[in])                \n\t"
 883     "lh            %[r1],      2(%[in])                \n\t"
 884     "lh            %[r2],      4(%[in])                \n\t"
 885     "lh            %[r3],      6(%[in])                \n\t"
 886     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
 887     "shllv_s.ph    %[r1],      %[r1],       %[shift]   \n\t"
 888     "shllv_s.ph    %[r2],      %[r2],       %[shift]   \n\t"
 889     "shllv_s.ph    %[r3],      %[r3],       %[shift]   \n\t"
 890     "addiu         %[in],      %[in],       8          \n\t"
 891     "addiu         %[t0],      %[t0],       -1         \n\t"
 892     "sh            %[r0],      0(%[out])               \n\t"
 893     "sh            %[r1],      2(%[out])               \n\t"
 894     "sh            %[r2],      4(%[out])               \n\t"
 895     "sh            %[r3],      6(%[out])               \n\t"
 896     "bgtz          %[t0],      1b                      \n\t"
 897     " addiu        %[out],     %[out],      8          \n\t"
 898    "2:                                                 \n\t"
 899     "beqz          %[len],     8f                      \n\t"
 900     " nop                                              \n\t"
 901    "3:                                                 \n\t"
 902     "lh            %[r0],      0(%[in])                \n\t"
 903     "addiu         %[in],      %[in],       2          \n\t"
 904     "addiu         %[len],     %[len],      -1         \n\t"
 905     "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
 906     "addiu         %[out],     %[out],      2          \n\t"
 907     "bgtz          %[len],     3b                      \n\t"
 908     " sh           %[r0],      -2(%[out])              \n\t"
 909     "b             8f                                  \n\t"
 910    "4:                                                 \n\t"
 911     "negu          %[shift],   %[shift]                \n\t"
 912     "beqz          %[t0],      6f                      \n\t"
 913     " andi         %[len],     %[len],      3          \n\t"
 914    "5:                                                 \n\t"
 915     "lh            %[r0],      0(%[in])                \n\t"
 916     "lh            %[r1],      2(%[in])                \n\t"
 917     "lh            %[r2],      4(%[in])                \n\t"
 918     "lh            %[r3],      6(%[in])                \n\t"
 919     "srav          %[r0],      %[r0],       %[shift]   \n\t"
 920     "srav          %[r1],      %[r1],       %[shift]   \n\t"
 921     "srav          %[r2],      %[r2],       %[shift]   \n\t"
 922     "srav          %[r3],      %[r3],       %[shift]   \n\t"
 923     "addiu         %[in],      %[in],       8          \n\t"
 924     "addiu         %[t0],      %[t0],       -1         \n\t"
 925     "sh            %[r0],      0(%[out])               \n\t"
 926     "sh            %[r1],      2(%[out])               \n\t"
 927     "sh            %[r2],      4(%[out])               \n\t"
 928     "sh            %[r3],      6(%[out])               \n\t"
 929     "bgtz          %[t0],      5b                      \n\t"
 930     " addiu        %[out],     %[out],      8          \n\t"
 931    "6:                                                 \n\t"
 932     "beqz          %[len],     8f                      \n\t"
 933     " nop                                              \n\t"
 934    "7:                                                 \n\t"
 935     "lh            %[r0],      0(%[in])                \n\t"
 936     "addiu         %[in],      %[in],       2          \n\t"
 937     "addiu         %[len],     %[len],      -1         \n\t"
 938     "srav          %[r0],      %[r0],       %[shift]   \n\t"
 939     "addiu         %[out],     %[out],      2          \n\t"
 940     "bgtz          %[len],     7b                      \n\t"
 941     " sh           %[r0],      -2(%[out])              \n\t"
 942    "8:                                                 \n\t"
 943     ".set          pop                                 \n\t"
 944     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
 945       [r2] "=&r" (r2), [r3] "=&r" (r3)
 946     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
 947       [out] "r" (out)
 948     : "memory"
 949   );
 950 }
 951 #endif
 952
 953 // Normalize the real-valued signal |in|, the input to forward FFT.
 954 void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
 955                                         const int16_t* in,
 956                                         int16_t* out) {
 957   int32_t r0, r1, r2, r3, t0;
 958   int len = inst->anaLen;
 959   int shift = inst->normData;
 960
 961   __asm __volatile (
 962     ".set          push                                \n\t"
 963     ".set          noreorder                           \n\t"
 964     "beqz          %[len],     4f                      \n\t"
 965     " sra          %[t0],      %[len],      2          \n\t"
 966     "beqz          %[t0],      2f                      \n\t"
 967     " andi         %[len],     %[len],      3          \n\t"
 968    "1:                                                 \n\t"
 969     "lh            %[r0],      0(%[in])                \n\t"
 970     "lh            %[r1],      2(%[in])                \n\t"
 971     "lh            %[r2],      4(%[in])                \n\t"
 972     "lh            %[r3],      6(%[in])                \n\t"
 973     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
 974     "sllv          %[r1],      %[r1],       %[shift]   \n\t"
 975     "sllv          %[r2],      %[r2],       %[shift]   \n\t"
 976     "sllv          %[r3],      %[r3],       %[shift]   \n\t"
 977     "addiu         %[in],      %[in],       8          \n\t"
 978     "addiu         %[t0],      %[t0],       -1         \n\t"
 979     "sh            %[r0],      0(%[out])               \n\t"
 980     "sh            %[r1],      2(%[out])               \n\t"
 981     "sh            %[r2],      4(%[out])               \n\t"
 982     "sh            %[r3],      6(%[out])               \n\t"
 983     "bgtz          %[t0],      1b                      \n\t"
 984     " addiu        %[out],     %[out],      8          \n\t"
 985    "2:                                                 \n\t"
 986     "beqz          %[len],     4f                      \n\t"
 987     " nop                                              \n\t"
 988    "3:                                                 \n\t"
 989     "lh            %[r0],      0(%[in])                \n\t"
 990     "addiu         %[in],      %[in],       2          \n\t"
 991     "addiu         %[len],     %[len],      -1         \n\t"
 992     "sllv          %[r0],      %[r0],       %[shift]   \n\t"
 993     "addiu         %[out],     %[out],      2          \n\t"
 994     "bgtz          %[len],     3b                      \n\t"
 995     " sh           %[r0],      -2(%[out])              \n\t"
 996    "4:                                                 \n\t"
 997     ".set          pop                                 \n\t"
 998     : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
 999       [r2] "=&r" (r2), [r3] "=&r" (r3)
1000     : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
1001       [out] "r" (out)
1002     : "memory"
1003   );
1004 }
1005