src/third_party/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_mips.c

   1 /*
   2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h"
  12 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
  13
  14 // MIPS optimization of the function WebRtcIsacfix_MatrixProduct1.
  15 // Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from
  16 // entropy_coding.c file.
  17 void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[],
  18                                       const int32_t matrix1[],
  19                                       int32_t matrix_product[],
  20                                       const int matrix1_index_factor1,
  21                                       const int matrix0_index_factor1,
  22                                       const int matrix1_index_init_case,
  23                                       const int matrix1_index_step,
  24                                       const int matrix0_index_step,
  25                                       const int inner_loop_count,
  26                                       const int mid_loop_count,
  27                                       const int shift) {
  28   if (matrix1_index_init_case != 0) {
  29     int j = SUBFRAMES, k = 0, n = 0;
  30     int32_t r0, r1, r2, sum32;
  31     int32_t* product_start = matrix_product;
  32     int32_t* product_ptr;
  33     const uint32_t product_step = 4 * mid_loop_count;
  34     const uint32_t matrix0_step = 2 * matrix0_index_step;
  35     const uint32_t matrix1_step = 4 * matrix1_index_step;
  36     const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
  37     const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
  38     const int16_t* matrix0_start = matrix0;
  39     const int32_t* matrix1_start = matrix1;
  40     int16_t* matrix0_ptr;
  41     int32_t* matrix1_ptr;
  42
  43     __asm __volatile (
  44       ".set     push                                                       \n\t"
  45       ".set     noreorder                                                  \n\t"
  46      "1:                                                                   \n\t"
  47       "addu     %[product_ptr],     %[product_start],     $0               \n\t"
  48       "addu     %[k],               %[product_step],      $0               \n\t"
  49       "addiu    %[j],               %[j],                 -1               \n\t"
  50       "addu     %[matrix1_start],   %[matrix1],           $0               \n\t"
  51      "2:                                                                   \n\t"
  52       "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t"
  53       "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t"
  54       "addu     %[n],               %[inner_loop_count],  $0               \n\t"
  55       "mul      %[sum32],           $0,                   $0               \n\t"
  56      "3:                                                                   \n\t"
  57       "lw       %[r0],              0(%[matrix1_ptr])                      \n\t"
  58       "lh       %[r1],              0(%[matrix0_ptr])                      \n\t"
  59       "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t"
  60       "sllv     %[r0],              %[r0],                %[shift]         \n\t"
  61       "andi     %[r2],              %[r0],                0xffff           \n\t"
  62       "sra      %[r2],              %[r2],                1                \n\t"
  63       "mul      %[r2],              %[r2],                %[r1]            \n\t"
  64       "sra      %[r0],              %[r0],                16               \n\t"
  65       "mul      %[r0],              %[r0],                %[r1]            \n\t"
  66       "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t"
  67       "addiu    %[n],               %[n],                 -1               \n\t"
  68 #if defined(MIPS_DSP_R1_LE)
  69       "shra_r.w %[r2],              %[r2],                15               \n\t"
  70 #else
  71       "addiu    %[r2],              %[r2],                0x4000           \n\t"
  72       "sra      %[r2],              %[r2],                15               \n\t"
  73 #endif
  74       "addu     %[sum32],           %[sum32],             %[r2]            \n\t"
  75       "bgtz     %[n],               3b                                     \n\t"
  76       " addu    %[sum32],           %[sum32],             %[r0]            \n\t"
  77       "addiu    %[k],               %[k],                 -4               \n\t"
  78       "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t"
  79       "sw       %[sum32],           0(%[product_ptr])                      \n\t"
  80       "bgtz     %[k],               2b                                     \n\t"
  81       " addiu   %[product_ptr],     %[product_ptr],       4                \n\t"
  82       "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t"
  83       "bgtz     %[j],               1b                                     \n\t"
  84       " addu    %[product_start],   %[product_start],     %[product_step]  \n\t"
  85       ".set     pop                                                        \n\t"
  86       : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
  87         [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start),
  88         [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
  89         [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
  90         [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
  91       : [product_step] "r" (product_step), [matrix1] "r" (matrix1),
  92         [inner_loop_count] "r" (inner_loop_count),
  93         [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
  94         [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
  95         [matrix0_step2] "r" (matrix0_step2)
  96       : "hi", "lo", "memory"
  97     );
  98   } else {
  99     int j = SUBFRAMES, k = 0, n = 0;
 100     int32_t r0, r1, r2, sum32;
 101     int32_t* product_start = matrix_product;
 102     int32_t* product_ptr;
 103     const uint32_t product_step = 4 * mid_loop_count;
 104     const uint32_t matrix0_step = 2 * matrix0_index_step;
 105     const uint32_t matrix1_step = 4 * matrix1_index_step;
 106     const uint32_t matrix0_step2 = 2 * matrix0_index_factor1;
 107     const uint32_t matrix1_step2 = 4 * matrix1_index_factor1;
 108     const int16_t* matrix0_start = matrix0;
 109     const int32_t* matrix1_start = matrix1;
 110     int16_t* matrix0_ptr;
 111     int32_t* matrix1_ptr;
 112
 113     __asm __volatile (
 114       ".set     push                                                       \n\t"
 115       ".set     noreorder                                                  \n\t"
 116      "1:                                                                   \n\t"
 117       "addu     %[product_ptr],     %[product_start],     $0               \n\t"
 118       "addu     %[k],               %[product_step],      $0               \n\t"
 119       "addiu    %[j],               %[j],                 -1               \n\t"
 120       "addu     %[matrix0_start],   %[matrix0],           $0               \n\t"
 121      "2:                                                                   \n\t"
 122       "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t"
 123       "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t"
 124       "addu     %[n],               %[inner_loop_count],  $0               \n\t"
 125       "mul      %[sum32],           $0,                   $0               \n\t"
 126      "3:                                                                   \n\t"
 127       "lw       %[r0],              0(%[matrix1_ptr])                      \n\t"
 128       "lh       %[r1],              0(%[matrix0_ptr])                      \n\t"
 129       "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t"
 130       "sllv     %[r0],              %[r0],                %[shift]         \n\t"
 131       "andi     %[r2],              %[r0],                0xffff           \n\t"
 132       "sra      %[r2],              %[r2],                1                \n\t"
 133       "mul      %[r2],              %[r2],                %[r1]            \n\t"
 134       "sra      %[r0],              %[r0],                16               \n\t"
 135       "mul      %[r0],              %[r0],                %[r1]            \n\t"
 136       "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t"
 137       "addiu    %[n],               %[n],                 -1               \n\t"
 138 #if defined(MIPS_DSP_R1_LE)
 139       "shra_r.w %[r2],              %[r2],                15               \n\t"
 140 #else
 141       "addiu    %[r2],              %[r2],                0x4000           \n\t"
 142       "sra      %[r2],              %[r2],                15               \n\t"
 143 #endif
 144       "addu     %[sum32],           %[sum32],             %[r2]            \n\t"
 145       "bgtz     %[n],               3b                                     \n\t"
 146       " addu    %[sum32],           %[sum32],             %[r0]            \n\t"
 147       "addiu    %[k],               %[k],                 -4               \n\t"
 148       "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t"
 149       "sw       %[sum32],           0(%[product_ptr])                      \n\t"
 150       "bgtz     %[k],               2b                                     \n\t"
 151       " addiu   %[product_ptr],     %[product_ptr],       4                \n\t"
 152       "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t"
 153       "bgtz     %[j],               1b                                     \n\t"
 154       " addu    %[product_start],   %[product_start],     %[product_step]  \n\t"
 155       ".set     pop                                                        \n\t"
 156       : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start),
 157         [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start),
 158         [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
 159         [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0),
 160         [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2)
 161       : [product_step] "r" (product_step), [matrix0] "r" (matrix0),
 162         [inner_loop_count] "r" (inner_loop_count),
 163         [matrix1_step] "r" (matrix1_step), [shift] "r" (shift),
 164         [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2),
 165         [matrix0_step2] "r" (matrix0_step2)
 166       : "hi", "lo", "memory"
 167     );
 168   }
 169 }
 170
 171 // MIPS optimization of the function WebRtcIsacfix_MatrixProduct2.
 172 // Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from
 173 // entropy_coding.c file.
 174 void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[],
 175                                       const int32_t matrix1[],
 176                                       int32_t matrix_product[],
 177                                       const int matrix0_index_factor,
 178                                       const int matrix0_index_step) {
 179   int j = 0, n = 0;
 180   int loop_count = SUBFRAMES;
 181   const int16_t* matrix0_ptr;
 182   const int32_t* matrix1_ptr;
 183   const int16_t* matrix0_start = matrix0;
 184   const int matrix0_step = 2 * matrix0_index_step;
 185   const int matrix0_step2 = 2 * matrix0_index_factor;
 186   int32_t r0, r1, r2, r3, r4, sum32, sum32_2;
 187
 188   __asm __volatile (
 189     ".set       push                                                   \n\t"
 190     ".set       noreorder                                              \n\t"
 191     "addu       %[j],              %[loop_count],     $0               \n\t"
 192     "addu       %[matrix0_start],  %[matrix0],        $0               \n\t"
 193    "1:                                                                 \n\t"
 194     "addu       %[matrix1_ptr],    %[matrix1],        $0               \n\t"
 195     "addu       %[matrix0_ptr],    %[matrix0_start],  $0               \n\t"
 196     "addu       %[n],              %[loop_count],     $0               \n\t"
 197     "mul        %[sum32],          $0,                $0               \n\t"
 198     "mul        %[sum32_2],        $0,                $0               \n\t"
 199    "2:                                                                 \n\t"
 200     "lw         %[r0],             0(%[matrix1_ptr])                   \n\t"
 201     "lw         %[r1],             4(%[matrix1_ptr])                   \n\t"
 202     "lh         %[r2],             0(%[matrix0_ptr])                   \n\t"
 203     "andi       %[r3],             %[r0],             0xffff           \n\t"
 204     "sra        %[r3],             %[r3],             1                \n\t"
 205     "mul        %[r3],             %[r3],             %[r2]            \n\t"
 206     "andi       %[r4],             %[r1],             0xffff           \n\t"
 207     "sra        %[r4],             %[r4],             1                \n\t"
 208     "mul        %[r4],             %[r4],             %[r2]            \n\t"
 209     "sra        %[r0],             %[r0],             16               \n\t"
 210     "mul        %[r0],             %[r0],             %[r2]            \n\t"
 211     "sra        %[r1],             %[r1],             16               \n\t"
 212     "mul        %[r1],             %[r1],             %[r2]            \n\t"
 213 #if defined(MIPS_DSP_R1_LE)
 214     "shra_r.w   %[r3],             %[r3],             15               \n\t"
 215     "shra_r.w   %[r4],             %[r4],             15               \n\t"
 216 #else
 217     "addiu      %[r3],             %[r3],             0x4000           \n\t"
 218     "sra        %[r3],             %[r3],             15               \n\t"
 219     "addiu      %[r4],             %[r4],             0x4000           \n\t"
 220     "sra        %[r4],             %[r4],             15               \n\t"
 221 #endif
 222     "addiu      %[matrix1_ptr],    %[matrix1_ptr],    8                \n\t"
 223     "addu       %[matrix0_ptr],    %[matrix0_ptr],    %[matrix0_step]  \n\t"
 224     "addiu      %[n],              %[n],              -1               \n\t"
 225     "addu       %[sum32],          %[sum32],          %[r3]            \n\t"
 226     "addu       %[sum32_2],        %[sum32_2],        %[r4]            \n\t"
 227     "addu       %[sum32],          %[sum32],          %[r0]            \n\t"
 228     "bgtz       %[n],              2b                                  \n\t"
 229     " addu      %[sum32_2],        %[sum32_2],        %[r1]            \n\t"
 230     "sra        %[sum32],          %[sum32],          3                \n\t"
 231     "sra        %[sum32_2],        %[sum32_2],        3                \n\t"
 232     "addiu      %[j],              %[j],              -1               \n\t"
 233     "addu       %[matrix0_start],  %[matrix0_start],  %[matrix0_step2] \n\t"
 234     "sw         %[sum32],          0(%[matrix_product])                \n\t"
 235     "sw         %[sum32_2],        4(%[matrix_product])                \n\t"
 236     "bgtz       %[j],              1b                                  \n\t"
 237     " addiu     %[matrix_product], %[matrix_product], 8                \n\t"
 238     ".set       pop                                                    \n\t"
 239     : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start),
 240       [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr),
 241       [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2),
 242       [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
 243       [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product)
 244     : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0),
 245       [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step),
 246       [matrix0_step2] "r" (matrix0_step2)
 247     : "hi", "lo", "memory"
 248   );
 249 }