2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 #include "third_party/googletest/src/include/gtest/gtest.h"
15 #include "./vpx_dsp_rtcd.h"
16 #include "vpx_ports/vpx_timer.h"
18 #include "test/acm_random.h"
19 #include "test/register_state_check.h"
23 using ::libvpx_test::ACMRandom;
25 typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
28 void hadamard_loop(const tran_low_t *a, tran_low_t *out) {
30 for (int i = 0; i < 8; i += 2) {
31 b[i + 0] = a[i * 8] + a[(i + 1) * 8];
32 b[i + 1] = a[i * 8] - a[(i + 1) * 8];
35 for (int i = 0; i < 8; i += 4) {
36 c[i + 0] = b[i + 0] + b[i + 2];
37 c[i + 1] = b[i + 1] + b[i + 3];
38 c[i + 2] = b[i + 0] - b[i + 2];
39 c[i + 3] = b[i + 1] - b[i + 3];
51 void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
54 for (int i = 0; i < 8; ++i) {
55 for (int j = 0; j < 8; ++j) {
56 input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
59 for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8);
60 for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8);
63 void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
64 /* The source is a 16x16 block. The destination is rearranged to 8x32.
66 reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
67 reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
68 reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
69 reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
71 /* Overlay the 8x8 blocks and combine. */
72 for (int i = 0; i < 64; ++i) {
73 /* 8x8 steps the range up to 15 bits. */
74 const tran_low_t a0 = b[0];
75 const tran_low_t a1 = b[64];
76 const tran_low_t a2 = b[128];
77 const tran_low_t a3 = b[192];
79 /* Prevent the result from escaping int16_t. */
80 const tran_low_t b0 = (a0 + a1) >> 1;
81 const tran_low_t b1 = (a0 - a1) >> 1;
82 const tran_low_t b2 = (a2 + a3) >> 1;
83 const tran_low_t b3 = (a2 - a3) >> 1;
85 /* Store a 16 bit value. */
95 void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
96 reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
97 reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
98 reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
99 reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
101 for (int i = 0; i < 256; ++i) {
102 const tran_low_t a0 = b[0];
103 const tran_low_t a1 = b[256];
104 const tran_low_t a2 = b[512];
105 const tran_low_t a3 = b[768];
107 const tran_low_t b0 = (a0 + a1) >> 2;
108 const tran_low_t b1 = (a0 - a1) >> 2;
109 const tran_low_t b2 = (a2 + a3) >> 2;
110 const tran_low_t b3 = (a2 - a3) >> 2;
121 struct HadamardFuncWithSize {
122 HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
127 std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
128 return os << "block size: " << hfs.block_size;
131 class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
133 virtual void SetUp() {
134 h_func_ = GetParam().func;
135 bwh_ = GetParam().block_size;
136 block_size_ = bwh_ * bwh_;
137 rnd_.Reset(ACMRandom::DeterministicSeed());
140 virtual int16_t Rand() = 0;
142 void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
145 reference_hadamard32x32(a, a_stride, b);
147 reference_hadamard16x16(a, a_stride, b);
149 reference_hadamard8x8(a, a_stride, b);
152 void CompareReferenceRandom() {
153 const int kMaxBlockSize = 32 * 32;
154 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
155 DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
156 memset(a, 0, sizeof(a));
157 memset(b, 0, sizeof(b));
159 tran_low_t b_ref[kMaxBlockSize];
160 memset(b_ref, 0, sizeof(b_ref));
162 for (int i = 0; i < block_size_; ++i) a[i] = Rand();
164 ReferenceHadamard(a, bwh_, b_ref, bwh_);
165 ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
167 // The order of the output is not important. Sort before checking.
168 std::sort(b, b + block_size_);
169 std::sort(b_ref, b_ref + block_size_);
170 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
174 const int kMaxBlockSize = 32 * 32;
175 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
176 DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
177 memset(a, 0, sizeof(a));
178 for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
180 tran_low_t b_ref[kMaxBlockSize];
181 for (int i = 8; i < 64; i += 8) {
182 memset(b, 0, sizeof(b));
183 memset(b_ref, 0, sizeof(b_ref));
185 ReferenceHadamard(a, i, b_ref, bwh_);
186 ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
188 // The order of the output is not important. Sort before checking.
189 std::sort(b, b + block_size_);
190 std::sort(b_ref, b_ref + block_size_);
191 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
195 void SpeedTest(int times) {
196 const int kMaxBlockSize = 32 * 32;
197 DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
198 DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
199 memset(input, 1, sizeof(input));
200 memset(output, 0, sizeof(output));
202 vpx_usec_timer timer;
203 vpx_usec_timer_start(&timer);
204 for (int i = 0; i < times; ++i) {
205 h_func_(input, bwh_, output);
207 vpx_usec_timer_mark(&timer);
209 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
210 printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
217 HadamardFunc h_func_;
221 class HadamardLowbdTest : public HadamardTestBase {
223 virtual int16_t Rand() { return rnd_.Rand9Signed(); }
226 TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
228 TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
230 TEST_P(HadamardLowbdTest, DISABLED_Speed) {
236 INSTANTIATE_TEST_SUITE_P(
237 C, HadamardLowbdTest,
238 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8),
239 HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16),
240 HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32)));
243 INSTANTIATE_TEST_SUITE_P(
244 SSE2, HadamardLowbdTest,
245 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8),
246 HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16),
247 HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32)));
251 INSTANTIATE_TEST_SUITE_P(
252 AVX2, HadamardLowbdTest,
253 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16),
254 HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
257 #if HAVE_SSSE3 && VPX_ARCH_X86_64
258 INSTANTIATE_TEST_SUITE_P(
259 SSSE3, HadamardLowbdTest,
260 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
261 #endif // HAVE_SSSE3 && VPX_ARCH_X86_64
264 INSTANTIATE_TEST_SUITE_P(
265 NEON, HadamardLowbdTest,
266 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8),
267 HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16)));
270 // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
271 // in place and turn on the unit test.
272 #if !CONFIG_VP9_HIGHBITDEPTH
274 INSTANTIATE_TEST_SUITE_P(
275 MSA, HadamardLowbdTest,
276 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8),
277 HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16)));
279 #endif // !CONFIG_VP9_HIGHBITDEPTH
282 INSTANTIATE_TEST_SUITE_P(
283 VSX, HadamardLowbdTest,
284 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8),
285 HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16)));
289 INSTANTIATE_TEST_SUITE_P(
290 LSX, HadamardLowbdTest,
291 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_lsx, 8),
292 HadamardFuncWithSize(&vpx_hadamard_16x16_lsx, 16)));
295 #if CONFIG_VP9_HIGHBITDEPTH
296 class HadamardHighbdTest : public HadamardTestBase {
298 virtual int16_t Rand() { return rnd_.Rand13Signed(); }
301 TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
303 TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
305 TEST_P(HadamardHighbdTest, DISABLED_Speed) {
311 INSTANTIATE_TEST_SUITE_P(
312 C, HadamardHighbdTest,
313 ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8),
314 HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16),
315 HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32)));
318 INSTANTIATE_TEST_SUITE_P(
319 AVX2, HadamardHighbdTest,
320 ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8),
321 HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16),
322 HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2,
326 #endif // CONFIG_VP9_HIGHBITDEPTH