2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
13 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
14 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
15 // lpc_masking_model.c
16 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
19 int16_t* a_polynomial,
21 int* q_val_residual_energy) {
24 int shift_internal = 0, shift_norm = 0;
25 int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
26 int32_t tmp_corr_c = corr_coeffs[0];
27 int16_t* tmp_a_poly = &a_polynomial[0];
31 for (j = 0; j <= lpc_order; j++) {
32 // For the case of i == 0:
34 // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
44 "lh %[tmp2], 0(%[tmp_a_poly]) \n\t"
45 "mul %[tmp32], %[tmp2], %[tmp2] \n\t"
46 "addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t"
47 "sra %[sign_2], %[sum64_hi], 31 \n\t"
48 "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
49 "shilov $ac0, %[shift_internal] \n\t"
50 "mfhi %[tmp2], $ac0 \n\t"
51 "mflo %[tmp3], $ac0 \n\t"
52 "sra %[sign_1], %[tmp2], 31 \n\t"
53 "xor %[sign_3], %[sign_1], %[sign_2] \n\t"
55 : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
56 [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
57 [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
58 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
59 : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
60 : "hi", "lo", "memory"
67 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
68 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
70 : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
71 : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
72 : "hi", "lo", "memory"
75 if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
76 ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
77 // Shift right for overflow.
81 "addiu %[shift_internal], %[shift_internal], 1 \n\t"
82 "prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
83 "sra %[sum64_hi], %[sum64_hi], 1 \n\t"
84 "prepend %[tmp3], %[tmp2], 1 \n\t"
85 "sra %[tmp2], %[tmp2], 1 \n\t"
86 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
87 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
89 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
90 [shift_internal] "+r" (shift_internal),
91 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
93 : "hi", "lo", "memory"
99 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
100 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
102 : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
103 : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
104 : "hi", "lo", "memory"
110 for (i = 1; i <= lpc_order; i++) {
111 tmp_corr_c = corr_coeffs[i];
112 int16_t* tmp_a_poly_j = &a_polynomial[i];
113 int16_t* tmp_a_poly_j_i = &a_polynomial[0];
114 for (j = i; j <= lpc_order; j++) {
115 // For the case of i = 1 .. lpc_order:
116 // residual_energy +=
117 // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
126 ".set noreorder \n\t"
127 "lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t"
128 "lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t"
129 "addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t"
130 "addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t"
131 "mul %[tmp32], %[tmp3], %[tmp2] \n\t"
132 "sll %[tmp32], %[tmp32], 1 \n\t"
133 "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
134 "shilov $ac0, %[shift_internal] \n\t"
135 "mfhi %[tmp2], $ac0 \n\t"
136 "mflo %[tmp3], $ac0 \n\t"
137 "sra %[sign_1], %[tmp2], 31 \n\t"
138 "sra %[sign_2], %[sum64_hi], 31 \n\t"
139 "xor %[sign_3], %[sign_1], %[sign_2] \n\t"
141 : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
142 [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
143 [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
144 [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
145 [sum64_lo] "+r" (sum64_lo)
146 : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
147 : "hi", "lo", "memory"
152 ".set noreorder \n\t"
153 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
154 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
156 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
157 [sum64_lo] "+r" (sum64_lo)
162 // Test overflow and sum the result.
163 if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
164 ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
165 // Shift right for overflow.
168 ".set noreorder \n\t"
169 "addiu %[shift_internal], %[shift_internal], 1 \n\t"
170 "prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
171 "sra %[sum64_hi], %[sum64_hi], 1 \n\t"
172 "prepend %[tmp3], %[tmp2], 1 \n\t"
173 "sra %[tmp2], %[tmp2], 1 \n\t"
174 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
175 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
177 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
178 [shift_internal] "+r" (shift_internal),
179 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
181 : "hi", "lo", "memory"
186 ".set noreorder \n\t"
187 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
188 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
190 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
191 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
193 : "hi", "lo", "memory"
199 word32_high = sum64_hi;
200 word32_low = sum64_lo;
202 // Calculate the value of shifting (shift_norm) for the 64-bit sum.
203 if (word32_high != 0) {
204 shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
208 ".set noreorder \n\t"
209 "srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t"
210 "li %[tmp1], 32 \n\t"
211 "subu %[tmp1], %[tmp1], %[shift_norm] \n\t"
212 "sll %[tmp1], %[sum64_hi], %[tmp1] \n\t"
213 "or %[residual_energy], %[residual_energy], %[tmp1] \n\t"
215 : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
216 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
217 : [shift_norm] "r" (shift_norm)
221 if ((word32_low & 0x80000000) != 0) {
223 residual_energy = (uint32_t)word32_low >> 1;
225 shift_norm = WebRtcSpl_NormW32(word32_low);
226 residual_energy = word32_low << shift_norm;
227 shift_norm = -shift_norm;
231 // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
232 // = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
233 *q_val_residual_energy =
234 q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
236 return residual_energy;