2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h"
13 #ifdef WEBRTC_ARCH_ARM_NEON
17 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
18 #include "webrtc/system_wrappers/interface/compile_assert_c.h"
20 extern int32_t WebRtcIsacfix_Log2Q8(uint32_t x);
22 void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8) {
24 int32_t ysum32,csum32, lys, lcs;
29 oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8
31 x = in + PITCH_MAX_LAG / 2 + 2;
32 scaling = WebRtcSpl_GetScalingSquare((int16_t*)in,
37 x = in + PITCH_MAX_LAG / 2 + 2;
38 for (n = 0; n < PITCH_CORR_LEN2; n++) {
39 ysum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[n],
42 csum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)x[n],
46 logcorQ8 += PITCH_LAG_SPAN2 - 1;
47 lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8
48 lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
50 lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
51 if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 in Q8
52 *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
54 *logcorQ8 = oneQ8; // 1.00
61 for (k = 1; k < PITCH_LAG_SPAN2; k++) {
63 ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[k - 1],
66 ysum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[PITCH_CORR_LEN2 + k - 1],
67 (int16_t)in[PITCH_CORR_LEN2 + k - 1],
69 #ifdef WEBRTC_ARCH_ARM_NEON
72 int32x4_t int_32x4_sum = vmovq_n_s32(0);
73 // Can't shift a Neon register to right with a non-constant shift value.
74 int32x4_t int_32x4_scale = vdupq_n_s32(-scaling);
75 // Assert a codition used in loop unrolling at compile-time.
76 COMPILE_ASSERT(PITCH_CORR_LEN2 %4 == 0);
78 for (n = 0; n < PITCH_CORR_LEN2; n += 4) {
79 int16x4_t int_16x4_x = vld1_s16(&x[n]);
80 int16x4_t int_16x4_in = vld1_s16(&inptr[n]);
81 int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in);
82 int_32x4 = vshlq_s32(int_32x4, int_32x4_scale);
83 int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4);
86 // Use vector store to avoid long stall from data trasferring
87 // from vector to general register.
88 vst1q_s32(vbuff, int_32x4_sum);
89 csum32 = vbuff[0] + vbuff[1];
96 for (n = 0; n < PITCH_CORR_LEN2; n++) {
97 csum32 += x[n] * inptr[n];
100 for (n = 0; n < PITCH_CORR_LEN2; n++) {
101 csum32 += (x[n] * inptr[n]) >> scaling;
108 lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8
109 lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
112 lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
113 if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2
114 *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
116 *logcorQ8 = oneQ8; // 1.00