src/third_party/webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c

   1 /*
   2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
  12
  13 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
  14 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
  15 // lpc_masking_model.c
  16 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
  17                                                   int32_t q_val_corr,
  18                                                   int q_val_polynomial,
  19                                                   int16_t* a_polynomial,
  20                                                   int32_t* corr_coeffs,
  21                                                   int* q_val_residual_energy) {
  22
  23   int i = 0, j = 0;
  24   int shift_internal = 0, shift_norm = 0;
  25   int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
  26   int32_t tmp_corr_c = corr_coeffs[0];
  27   int16_t* tmp_a_poly = &a_polynomial[0];
  28   int32_t sum64_hi = 0;
  29   int32_t sum64_lo = 0;
  30
  31   for (j = 0; j <= lpc_order; j++) {
  32     // For the case of i == 0:
  33     //   residual_energy +=
  34     //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
  35
  36     int32_t tmp2, tmp3;
  37     int16_t sign_1;
  38     int16_t sign_2;
  39     int16_t sign_3;
  40
  41     __asm __volatile (
  42       ".set      push                                                \n\t"
  43       ".set      noreorder                                           \n\t"
  44       "lh        %[tmp2],         0(%[tmp_a_poly])                   \n\t"
  45       "mul       %[tmp32],        %[tmp2],            %[tmp2]        \n\t"
  46       "addiu     %[tmp_a_poly],   %[tmp_a_poly],      2              \n\t"
  47       "sra       %[sign_2],       %[sum64_hi],        31             \n\t"
  48       "mult      $ac0,            %[tmp32],           %[tmp_corr_c]  \n\t"
  49       "shilov    $ac0,            %[shift_internal]                  \n\t"
  50       "mfhi      %[tmp2],         $ac0                               \n\t"
  51       "mflo      %[tmp3],         $ac0                               \n\t"
  52       "sra       %[sign_1],       %[tmp2],            31             \n\t"
  53       "xor       %[sign_3],       %[sign_1],          %[sign_2]      \n\t"
  54       ".set      pop                                                 \n\t"
  55       : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
  56         [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
  57         [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
  58         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
  59       : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
  60       : "hi", "lo", "memory"
  61     );
  62
  63     if (sign_3 != 0) {
  64       __asm __volatile (
  65         ".set      push                                      \n\t"
  66         ".set      noreorder                                 \n\t"
  67         "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
  68         "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
  69         ".set      pop                                       \n\t"
  70         : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
  71         : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
  72         : "hi", "lo", "memory"
  73       );
  74     } else {
  75       if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
  76           ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
  77         // Shift right for overflow.
  78         __asm __volatile (
  79           ".set      push                                             \n\t"
  80           ".set      noreorder                                        \n\t"
  81           "addiu     %[shift_internal], %[shift_internal],  1         \n\t"
  82           "prepend   %[sum64_lo],       %[sum64_hi],        1         \n\t"
  83           "sra       %[sum64_hi],       %[sum64_hi],        1         \n\t"
  84           "prepend   %[tmp3],           %[tmp2],            1         \n\t"
  85           "sra       %[tmp2],           %[tmp2],            1         \n\t"
  86           "addsc     %[sum64_lo],       %[sum64_lo],        %[tmp3]   \n\t"
  87           "addwc     %[sum64_hi],       %[sum64_hi],        %[tmp2]   \n\t"
  88           ".set      pop                                              \n\t"
  89           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
  90             [shift_internal] "+r" (shift_internal),
  91             [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
  92           :
  93           : "hi", "lo", "memory"
  94         );
  95       } else {
  96         __asm __volatile (
  97           ".set      push                                      \n\t"
  98           ".set      noreorder                                 \n\t"
  99           "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
 100           "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
 101           ".set      pop                                       \n\t"
 102           : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
 103           : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
 104           : "hi", "lo", "memory"
 105         );
 106       }
 107     }
 108   }
 109
 110   for (i = 1; i <= lpc_order; i++) {
 111     tmp_corr_c = corr_coeffs[i];
 112     int16_t* tmp_a_poly_j = &a_polynomial[i];
 113     int16_t* tmp_a_poly_j_i = &a_polynomial[0];
 114     for (j = i; j <= lpc_order; j++) {
 115       // For the case of i = 1 .. lpc_order:
 116       //   residual_energy +=
 117       //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
 118
 119       int32_t tmp2, tmp3;
 120       int16_t sign_1;
 121       int16_t sign_2;
 122       int16_t sign_3;
 123
 124       __asm __volatile (
 125         ".set      push                                                   \n\t"
 126         ".set      noreorder                                              \n\t"
 127         "lh        %[tmp3],           0(%[tmp_a_poly_j])                  \n\t"
 128         "lh        %[tmp2],           0(%[tmp_a_poly_j_i])                \n\t"
 129         "addiu     %[tmp_a_poly_j],   %[tmp_a_poly_j],    2               \n\t"
 130         "addiu     %[tmp_a_poly_j_i], %[tmp_a_poly_j_i],  2               \n\t"
 131         "mul       %[tmp32],          %[tmp3],            %[tmp2]         \n\t"
 132         "sll       %[tmp32],          %[tmp32],           1               \n\t"
 133         "mult      $ac0,              %[tmp32],           %[tmp_corr_c]   \n\t"
 134         "shilov    $ac0,              %[shift_internal]                   \n\t"
 135         "mfhi      %[tmp2],           $ac0                                \n\t"
 136         "mflo      %[tmp3],           $ac0                                \n\t"
 137         "sra       %[sign_1],         %[tmp2],            31              \n\t"
 138         "sra       %[sign_2],         %[sum64_hi],        31              \n\t"
 139         "xor       %[sign_3],         %[sign_1],          %[sign_2]       \n\t"
 140         ".set      pop                                                    \n\t"
 141         : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
 142           [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
 143           [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
 144           [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
 145           [sum64_lo] "+r" (sum64_lo)
 146         : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
 147         : "hi", "lo", "memory"
 148       );
 149       if (sign_3 != 0) {
 150         __asm __volatile (
 151           ".set      push                                     \n\t"
 152           ".set      noreorder                                \n\t"
 153           "addsc     %[sum64_lo],   %[sum64_lo],   %[tmp3]    \n\t"
 154           "addwc     %[sum64_hi],   %[sum64_hi],   %[tmp2]    \n\t"
 155           ".set      pop                                      \n\t"
 156           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
 157             [sum64_lo] "+r" (sum64_lo)
 158           :
 159           :"memory"
 160         );
 161       } else {
 162         // Test overflow and sum the result.
 163         if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
 164             ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
 165           // Shift right for overflow.
 166           __asm __volatile (
 167             ".set      push                                              \n\t"
 168             ".set      noreorder                                         \n\t"
 169             "addiu     %[shift_internal],  %[shift_internal],  1         \n\t"
 170             "prepend   %[sum64_lo],        %[sum64_hi],        1         \n\t"
 171             "sra       %[sum64_hi],        %[sum64_hi],        1         \n\t"
 172             "prepend   %[tmp3],            %[tmp2],            1         \n\t"
 173             "sra       %[tmp2],            %[tmp2],            1         \n\t"
 174             "addsc     %[sum64_lo],        %[sum64_lo],        %[tmp3]   \n\t"
 175             "addwc     %[sum64_hi],        %[sum64_hi],        %[tmp2]   \n\t"
 176             ".set      pop                                               \n\t"
 177             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
 178               [shift_internal] "+r" (shift_internal),
 179               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
 180             :
 181             : "hi", "lo", "memory"
 182           );
 183         } else {
 184           __asm __volatile (
 185             ".set      push                                      \n\t"
 186             ".set      noreorder                                 \n\t"
 187             "addsc     %[sum64_lo],    %[sum64_lo],   %[tmp3]    \n\t"
 188             "addwc     %[sum64_hi],    %[sum64_hi],   %[tmp2]    \n\t"
 189             ".set      pop                                       \n\t"
 190             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
 191               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
 192             :
 193             : "hi", "lo", "memory"
 194           );
 195         }
 196       }
 197     }
 198   }
 199   word32_high = sum64_hi;
 200   word32_low = sum64_lo;
 201
 202   // Calculate the value of shifting (shift_norm) for the 64-bit sum.
 203   if (word32_high != 0) {
 204     shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
 205     int tmp1;
 206     __asm __volatile (
 207       ".set    push                                                     \n\t"
 208       ".set    noreorder                                                \n\t"
 209       "srl     %[residual_energy],  %[sum64_lo],         %[shift_norm]  \n\t"
 210       "li      %[tmp1],             32                                  \n\t"
 211       "subu    %[tmp1],             %[tmp1],             %[shift_norm]  \n\t"
 212       "sll     %[tmp1],             %[sum64_hi],         %[tmp1]        \n\t"
 213       "or      %[residual_energy],  %[residual_energy],  %[tmp1]        \n\t"
 214       ".set    pop                                                      \n\t"
 215       : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
 216         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
 217       : [shift_norm] "r" (shift_norm)
 218       : "memory"
 219     );
 220   } else {
 221     if ((word32_low & 0x80000000) != 0) {
 222       shift_norm = 1;
 223       residual_energy = (uint32_t)word32_low >> 1;
 224     } else {
 225       shift_norm = WebRtcSpl_NormW32(word32_low);
 226       residual_energy = word32_low << shift_norm;
 227       shift_norm = -shift_norm;
 228     }
 229   }
 230
 231   // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
 232   //   = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
 233   *q_val_residual_energy =
 234       q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
 235
 236   return residual_energy;
 237 }