2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "test/acm_random.h"
13 #include "test/register_state_check.h"
14 #include "test/util.h"
15 #include "third_party/googletest/src/include/gtest/gtest.h"
17 #include "./vpx_config.h"
18 #include "./vp9_rtcd.h"
19 #include "vp9/common/vp9_filter.h"
20 #include "vpx_mem/vpx_mem.h"
21 #include "vpx_ports/mem.h"
25 static const unsigned int kMaxDimension = 64;
27 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
28 uint8_t *dst, ptrdiff_t dst_stride,
29 const int16_t *filter_x, int filter_x_stride,
30 const int16_t *filter_y, int filter_y_stride,
33 struct ConvolveFunctions {
34 ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
35 ConvolveFunc v8, ConvolveFunc v8_avg,
36 ConvolveFunc hv8, ConvolveFunc hv8_avg,
38 : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
39 hv8_avg_(hv8_avg), use_high_bd_(bd) {}
46 ConvolveFunc hv8_avg_;
47 int use_high_bd_; // 0 if high bitdepth not used, else the actual bit depth.
50 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
52 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
53 #define VP9_FILTER_WEIGHT 128
54 #define VP9_FILTER_SHIFT 7
55 uint8_t clip_pixel(int x) {
61 void filter_block2d_8_c(const uint8_t *src_ptr,
62 const unsigned int src_stride,
63 const int16_t *HFilter,
64 const int16_t *VFilter,
66 unsigned int dst_stride,
67 unsigned int output_width,
68 unsigned int output_height) {
69 // Between passes, we use an intermediate buffer whose height is extended to
70 // have enough horizontally filtered values as input for the vertical pass.
71 // This buffer is allocated to be big enough for the largest block type we
73 const int kInterp_Extend = 4;
74 const unsigned int intermediate_height =
75 (kInterp_Extend - 1) + output_height + kInterp_Extend;
78 // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
79 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
83 // and filter_max_width = 16
85 uint8_t intermediate_buffer[71 * kMaxDimension];
86 const int intermediate_next_stride = 1 - intermediate_height * output_width;
88 // Horizontal pass (src -> transposed intermediate).
89 uint8_t *output_ptr = intermediate_buffer;
90 const int src_next_row_stride = src_stride - output_width;
91 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
92 for (i = 0; i < intermediate_height; ++i) {
93 for (j = 0; j < output_width; ++j) {
95 const int temp = (src_ptr[0] * HFilter[0]) +
96 (src_ptr[1] * HFilter[1]) +
97 (src_ptr[2] * HFilter[2]) +
98 (src_ptr[3] * HFilter[3]) +
99 (src_ptr[4] * HFilter[4]) +
100 (src_ptr[5] * HFilter[5]) +
101 (src_ptr[6] * HFilter[6]) +
102 (src_ptr[7] * HFilter[7]) +
103 (VP9_FILTER_WEIGHT >> 1); // Rounding
105 // Normalize back to 0-255...
106 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
108 output_ptr += intermediate_height;
110 src_ptr += src_next_row_stride;
111 output_ptr += intermediate_next_stride;
114 // Vertical pass (transposed intermediate -> dst).
115 src_ptr = intermediate_buffer;
116 const int dst_next_row_stride = dst_stride - output_width;
117 for (i = 0; i < output_height; ++i) {
118 for (j = 0; j < output_width; ++j) {
120 const int temp = (src_ptr[0] * VFilter[0]) +
121 (src_ptr[1] * VFilter[1]) +
122 (src_ptr[2] * VFilter[2]) +
123 (src_ptr[3] * VFilter[3]) +
124 (src_ptr[4] * VFilter[4]) +
125 (src_ptr[5] * VFilter[5]) +
126 (src_ptr[6] * VFilter[6]) +
127 (src_ptr[7] * VFilter[7]) +
128 (VP9_FILTER_WEIGHT >> 1); // Rounding
130 // Normalize back to 0-255...
131 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
132 src_ptr += intermediate_height;
134 src_ptr += intermediate_next_stride;
135 dst_ptr += dst_next_row_stride;
139 void block2d_average_c(uint8_t *src,
140 unsigned int src_stride,
142 unsigned int output_stride,
143 unsigned int output_width,
144 unsigned int output_height) {
146 for (i = 0; i < output_height; ++i) {
147 for (j = 0; j < output_width; ++j) {
148 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
150 output_ptr += output_stride;
154 void filter_average_block2d_8_c(const uint8_t *src_ptr,
155 const unsigned int src_stride,
156 const int16_t *HFilter,
157 const int16_t *VFilter,
159 unsigned int dst_stride,
160 unsigned int output_width,
161 unsigned int output_height) {
162 uint8_t tmp[kMaxDimension * kMaxDimension];
164 assert(output_width <= kMaxDimension);
165 assert(output_height <= kMaxDimension);
166 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
167 output_width, output_height);
168 block2d_average_c(tmp, 64, dst_ptr, dst_stride,
169 output_width, output_height);
172 #if CONFIG_VP9_HIGHBITDEPTH
173 void high_filter_block2d_8_c(const uint16_t *src_ptr,
174 const unsigned int src_stride,
175 const int16_t *HFilter,
176 const int16_t *VFilter,
178 unsigned int dst_stride,
179 unsigned int output_width,
180 unsigned int output_height,
182 // Between passes, we use an intermediate buffer whose height is extended to
183 // have enough horizontally filtered values as input for the vertical pass.
184 // This buffer is allocated to be big enough for the largest block type we
186 const int kInterp_Extend = 4;
187 const unsigned int intermediate_height =
188 (kInterp_Extend - 1) + output_height + kInterp_Extend;
190 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
191 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
195 * and filter_max_width = 16
197 uint16_t intermediate_buffer[71 * kMaxDimension];
198 const int intermediate_next_stride = 1 - intermediate_height * output_width;
200 // Horizontal pass (src -> transposed intermediate).
202 uint16_t *output_ptr = intermediate_buffer;
203 const int src_next_row_stride = src_stride - output_width;
205 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
206 for (i = 0; i < intermediate_height; ++i) {
207 for (j = 0; j < output_width; ++j) {
209 const int temp = (src_ptr[0] * HFilter[0]) +
210 (src_ptr[1] * HFilter[1]) +
211 (src_ptr[2] * HFilter[2]) +
212 (src_ptr[3] * HFilter[3]) +
213 (src_ptr[4] * HFilter[4]) +
214 (src_ptr[5] * HFilter[5]) +
215 (src_ptr[6] * HFilter[6]) +
216 (src_ptr[7] * HFilter[7]) +
217 (VP9_FILTER_WEIGHT >> 1); // Rounding
219 // Normalize back to 0-255...
220 *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
222 output_ptr += intermediate_height;
224 src_ptr += src_next_row_stride;
225 output_ptr += intermediate_next_stride;
229 // Vertical pass (transposed intermediate -> dst).
231 uint16_t *src_ptr = intermediate_buffer;
232 const int dst_next_row_stride = dst_stride - output_width;
234 for (i = 0; i < output_height; ++i) {
235 for (j = 0; j < output_width; ++j) {
237 const int temp = (src_ptr[0] * VFilter[0]) +
238 (src_ptr[1] * VFilter[1]) +
239 (src_ptr[2] * VFilter[2]) +
240 (src_ptr[3] * VFilter[3]) +
241 (src_ptr[4] * VFilter[4]) +
242 (src_ptr[5] * VFilter[5]) +
243 (src_ptr[6] * VFilter[6]) +
244 (src_ptr[7] * VFilter[7]) +
245 (VP9_FILTER_WEIGHT >> 1); // Rounding
247 // Normalize back to 0-255...
248 *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
249 src_ptr += intermediate_height;
251 src_ptr += intermediate_next_stride;
252 dst_ptr += dst_next_row_stride;
257 void high_block2d_average_c(uint16_t *src,
258 unsigned int src_stride,
259 uint16_t *output_ptr,
260 unsigned int output_stride,
261 unsigned int output_width,
262 unsigned int output_height,
265 for (i = 0; i < output_height; ++i) {
266 for (j = 0; j < output_width; ++j) {
267 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
269 output_ptr += output_stride;
273 void high_filter_average_block2d_8_c(const uint16_t *src_ptr,
274 const unsigned int src_stride,
275 const int16_t *HFilter,
276 const int16_t *VFilter,
278 unsigned int dst_stride,
279 unsigned int output_width,
280 unsigned int output_height,
282 uint16_t tmp[kMaxDimension * kMaxDimension];
284 assert(output_width <= kMaxDimension);
285 assert(output_height <= kMaxDimension);
286 high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
287 output_width, output_height, bd);
288 high_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
289 output_width, output_height, bd);
291 #endif // CONFIG_VP9_HIGHBITDEPTH
293 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
295 static void SetUpTestCase() {
296 // Force input_ to be unaligned, output to be 16 byte aligned.
297 input_ = reinterpret_cast<uint8_t*>(
298 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
299 output_ = reinterpret_cast<uint8_t*>(
300 vpx_memalign(kDataAlignment, kOutputBufferSize));
301 #if CONFIG_VP9_HIGHBITDEPTH
302 input16_ = reinterpret_cast<uint16_t*>(
303 vpx_memalign(kDataAlignment,
304 (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
305 output16_ = reinterpret_cast<uint16_t*>(
306 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
310 static void TearDownTestCase() {
311 vpx_free(input_ - 1);
315 #if CONFIG_VP9_HIGHBITDEPTH
316 vpx_free(input16_ - 1);
324 static const int kDataAlignment = 16;
325 static const int kOuterBlockSize = 256;
326 static const int kInputStride = kOuterBlockSize;
327 static const int kOutputStride = kOuterBlockSize;
328 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
329 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
331 int Width() const { return GET_PARAM(0); }
332 int Height() const { return GET_PARAM(1); }
333 int BorderLeft() const {
334 const int center = (kOuterBlockSize - Width()) / 2;
335 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
337 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
339 bool IsIndexInBorder(int i) {
340 return (i < BorderTop() * kOuterBlockSize ||
341 i >= (BorderTop() + Height()) * kOuterBlockSize ||
342 i % kOuterBlockSize < BorderLeft() ||
343 i % kOuterBlockSize >= (BorderLeft() + Width()));
346 virtual void SetUp() {
348 #if CONFIG_VP9_HIGHBITDEPTH
349 if (UUT_->use_high_bd_ != 0)
350 mask_ = (1 << UUT_->use_high_bd_) - 1;
354 /* Set up guard blocks for an inner block centered in the outer block */
355 for (int i = 0; i < kOutputBufferSize; ++i) {
356 if (IsIndexInBorder(i))
362 ::libvpx_test::ACMRandom prng;
363 for (int i = 0; i < kInputBufferSize; ++i) {
366 #if CONFIG_VP9_HIGHBITDEPTH
370 input_[i] = prng.Rand8Extremes();
371 #if CONFIG_VP9_HIGHBITDEPTH
372 input16_[i] = prng.Rand16() & mask_;
378 void SetConstantInput(int value) {
379 memset(input_, value, kInputBufferSize);
380 #if CONFIG_VP9_HIGHBITDEPTH
381 vpx_memset16(input16_, value, kInputBufferSize);
385 void CheckGuardBlocks() {
386 for (int i = 0; i < kOutputBufferSize; ++i) {
387 if (IsIndexInBorder(i))
388 EXPECT_EQ(255, output_[i]);
392 uint8_t *input() const {
393 #if CONFIG_VP9_HIGHBITDEPTH
394 if (UUT_->use_high_bd_ == 0) {
395 return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
397 return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
401 return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
405 uint8_t *output() const {
406 #if CONFIG_VP9_HIGHBITDEPTH
407 if (UUT_->use_high_bd_ == 0) {
408 return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
410 return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
414 return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
418 uint16_t lookup(uint8_t *list, int index) const {
419 #if CONFIG_VP9_HIGHBITDEPTH
420 if (UUT_->use_high_bd_ == 0) {
423 return CONVERT_TO_SHORTPTR(list)[index];
430 void assign_val(uint8_t *list, int index, uint16_t val) const {
431 #if CONFIG_VP9_HIGHBITDEPTH
432 if (UUT_->use_high_bd_ == 0) {
433 list[index] = (uint8_t) val;
435 CONVERT_TO_SHORTPTR(list)[index] = val;
438 list[index] = (uint8_t) val;
442 void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
443 const unsigned int src_stride,
444 const int16_t *HFilter,
445 const int16_t *VFilter,
447 unsigned int dst_stride,
448 unsigned int output_width,
449 unsigned int output_height) {
450 #if CONFIG_VP9_HIGHBITDEPTH
451 if (UUT_->use_high_bd_ == 0) {
452 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
453 dst_ptr, dst_stride, output_width,
456 high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
458 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
459 output_width, output_height,
463 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
464 dst_ptr, dst_stride, output_width,
469 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
470 const unsigned int src_stride,
471 const int16_t *HFilter,
472 const int16_t *VFilter,
474 unsigned int dst_stride,
475 unsigned int output_width,
476 unsigned int output_height) {
477 #if CONFIG_VP9_HIGHBITDEPTH
478 if (UUT_->use_high_bd_ == 0) {
479 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
480 dst_ptr, dst_stride, output_width, output_height);
482 high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
484 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
485 output_width, output_height, UUT_->use_high_bd_);
488 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
489 dst_ptr, dst_stride, output_width, output_height);
493 const ConvolveFunctions* UUT_;
494 static uint8_t* input_;
495 static uint8_t* output_;
496 #if CONFIG_VP9_HIGHBITDEPTH
497 static uint16_t* input16_;
498 static uint16_t* output16_;
503 uint8_t* ConvolveTest::input_ = NULL;
504 uint8_t* ConvolveTest::output_ = NULL;
505 #if CONFIG_VP9_HIGHBITDEPTH
506 uint16_t* ConvolveTest::input16_ = NULL;
507 uint16_t* ConvolveTest::output16_ = NULL;
510 TEST_P(ConvolveTest, GuardBlocks) {
514 TEST_P(ConvolveTest, CopyHoriz) {
515 uint8_t* const in = input();
516 uint8_t* const out = output();
517 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
519 ASM_REGISTER_STATE_CHECK(
520 UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
525 for (int y = 0; y < Height(); ++y)
526 for (int x = 0; x < Width(); ++x)
527 ASSERT_EQ(lookup(out, y * kOutputStride + x),
528 lookup(in, y * kInputStride + x))
529 << "(" << x << "," << y << ")";
532 TEST_P(ConvolveTest, CopyVert) {
533 uint8_t* const in = input();
534 uint8_t* const out = output();
535 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
537 ASM_REGISTER_STATE_CHECK(
538 UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
543 for (int y = 0; y < Height(); ++y)
544 for (int x = 0; x < Width(); ++x)
545 ASSERT_EQ(lookup(out, y * kOutputStride + x),
546 lookup(in, y * kInputStride + x))
547 << "(" << x << "," << y << ")";
550 TEST_P(ConvolveTest, Copy2D) {
551 uint8_t* const in = input();
552 uint8_t* const out = output();
553 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
555 ASM_REGISTER_STATE_CHECK(
556 UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
561 for (int y = 0; y < Height(); ++y)
562 for (int x = 0; x < Width(); ++x)
563 ASSERT_EQ(lookup(out, y * kOutputStride + x),
564 lookup(in, y * kInputStride + x))
565 << "(" << x << "," << y << ")";
568 const int kNumFilterBanks = 4;
569 const int kNumFilters = 16;
571 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
572 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
573 const InterpKernel *filters =
574 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
575 for (int i = 0; i < kNumFilters; i++) {
576 const int p0 = filters[i][0] + filters[i][1];
577 const int p1 = filters[i][2] + filters[i][3];
578 const int p2 = filters[i][4] + filters[i][5];
579 const int p3 = filters[i][6] + filters[i][7];
584 EXPECT_LE(p0 + p3, 128);
585 EXPECT_LE(p0 + p3 + p1, 128);
586 EXPECT_LE(p0 + p3 + p1 + p2, 128);
587 EXPECT_EQ(p0 + p1 + p2 + p3, 128);
592 const int16_t kInvalidFilter[8] = { 0 };
594 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
595 uint8_t* const in = input();
596 uint8_t* const out = output();
597 #if CONFIG_VP9_HIGHBITDEPTH
598 uint8_t ref8[kOutputStride * kMaxDimension];
599 uint16_t ref16[kOutputStride * kMaxDimension];
601 if (UUT_->use_high_bd_ == 0) {
604 ref = CONVERT_TO_BYTEPTR(ref16);
607 uint8_t ref[kOutputStride * kMaxDimension];
610 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
611 const InterpKernel *filters =
612 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
613 const InterpKernel *const eighttap_smooth =
614 vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
616 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
617 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
618 wrapper_filter_block2d_8_c(in, kInputStride,
619 filters[filter_x], filters[filter_y],
623 if (filters == eighttap_smooth || (filter_x && filter_y))
624 ASM_REGISTER_STATE_CHECK(
625 UUT_->hv8_(in, kInputStride, out, kOutputStride,
626 filters[filter_x], 16, filters[filter_y], 16,
629 ASM_REGISTER_STATE_CHECK(
630 UUT_->v8_(in, kInputStride, out, kOutputStride,
631 kInvalidFilter, 16, filters[filter_y], 16,
634 ASM_REGISTER_STATE_CHECK(
635 UUT_->h8_(in, kInputStride, out, kOutputStride,
636 filters[filter_x], 16, kInvalidFilter, 16,
641 for (int y = 0; y < Height(); ++y)
642 for (int x = 0; x < Width(); ++x)
643 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
644 lookup(out, y * kOutputStride + x))
645 << "mismatch at (" << x << "," << y << "), "
646 << "filters (" << filter_bank << ","
647 << filter_x << "," << filter_y << ")";
653 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
654 uint8_t* const in = input();
655 uint8_t* const out = output();
656 #if CONFIG_VP9_HIGHBITDEPTH
657 uint8_t ref8[kOutputStride * kMaxDimension];
658 uint16_t ref16[kOutputStride * kMaxDimension];
660 if (UUT_->use_high_bd_ == 0) {
663 ref = CONVERT_TO_BYTEPTR(ref16);
666 uint8_t ref[kOutputStride * kMaxDimension];
669 // Populate ref and out with some random data
670 ::libvpx_test::ACMRandom prng;
671 for (int y = 0; y < Height(); ++y) {
672 for (int x = 0; x < Width(); ++x) {
674 #if CONFIG_VP9_HIGHBITDEPTH
675 if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
676 r = prng.Rand8Extremes();
678 r = prng.Rand16() & mask_;
681 r = prng.Rand8Extremes();
684 assign_val(out, y * kOutputStride + x, r);
685 assign_val(ref, y * kOutputStride + x, r);
689 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
690 const InterpKernel *filters =
691 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
692 const InterpKernel *const eighttap_smooth =
693 vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
695 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
696 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
697 wrapper_filter_average_block2d_8_c(in, kInputStride,
698 filters[filter_x], filters[filter_y],
702 if (filters == eighttap_smooth || (filter_x && filter_y))
703 ASM_REGISTER_STATE_CHECK(
704 UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
705 filters[filter_x], 16, filters[filter_y], 16,
708 ASM_REGISTER_STATE_CHECK(
709 UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
710 filters[filter_x], 16, filters[filter_y], 16,
713 ASM_REGISTER_STATE_CHECK(
714 UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
715 filters[filter_x], 16, filters[filter_y], 16,
720 for (int y = 0; y < Height(); ++y)
721 for (int x = 0; x < Width(); ++x)
722 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
723 lookup(out, y * kOutputStride + x))
724 << "mismatch at (" << x << "," << y << "), "
725 << "filters (" << filter_bank << ","
726 << filter_x << "," << filter_y << ")";
732 TEST_P(ConvolveTest, FilterExtremes) {
733 uint8_t *const in = input();
734 uint8_t *const out = output();
735 #if CONFIG_VP9_HIGHBITDEPTH
736 uint8_t ref8[kOutputStride * kMaxDimension];
737 uint16_t ref16[kOutputStride * kMaxDimension];
739 if (UUT_->use_high_bd_ == 0) {
742 ref = CONVERT_TO_BYTEPTR(ref16);
745 uint8_t ref[kOutputStride * kMaxDimension];
748 // Populate ref and out with some random data
749 ::libvpx_test::ACMRandom prng;
750 for (int y = 0; y < Height(); ++y) {
751 for (int x = 0; x < Width(); ++x) {
753 #if CONFIG_VP9_HIGHBITDEPTH
754 if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
755 r = prng.Rand8Extremes();
757 r = prng.Rand16() & mask_;
760 r = prng.Rand8Extremes();
762 assign_val(out, y * kOutputStride + x, r);
763 assign_val(ref, y * kOutputStride + x, r);
767 for (int axis = 0; axis < 2; axis++) {
769 while (seed_val < 256) {
770 for (int y = 0; y < 8; ++y) {
771 for (int x = 0; x < 8; ++x) {
772 #if CONFIG_VP9_HIGHBITDEPTH
773 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
774 ((seed_val >> (axis ? y : x)) & 1) * mask_);
776 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
777 ((seed_val >> (axis ? y : x)) & 1) * 255);
779 if (axis) seed_val++;
786 if (axis) seed_val += 8;
788 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
789 const InterpKernel *filters =
790 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
791 const InterpKernel *const eighttap_smooth =
792 vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
793 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
794 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
795 wrapper_filter_block2d_8_c(in, kInputStride,
796 filters[filter_x], filters[filter_y],
799 if (filters == eighttap_smooth || (filter_x && filter_y))
800 ASM_REGISTER_STATE_CHECK(
801 UUT_->hv8_(in, kInputStride, out, kOutputStride,
802 filters[filter_x], 16, filters[filter_y], 16,
805 ASM_REGISTER_STATE_CHECK(
806 UUT_->v8_(in, kInputStride, out, kOutputStride,
807 kInvalidFilter, 16, filters[filter_y], 16,
810 ASM_REGISTER_STATE_CHECK(
811 UUT_->h8_(in, kInputStride, out, kOutputStride,
812 filters[filter_x], 16, kInvalidFilter, 16,
815 for (int y = 0; y < Height(); ++y)
816 for (int x = 0; x < Width(); ++x)
817 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
818 lookup(out, y * kOutputStride + x))
819 << "mismatch at (" << x << "," << y << "), "
820 << "filters (" << filter_bank << ","
821 << filter_x << "," << filter_y << ")";
829 DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
830 { 0, 0, 0, 0, 0, 0, 0, 128},
831 { 0, 0, 0, 0, 0, 0, 128},
832 { 0, 0, 0, 0, 0, 128},
838 { 0, 0, 0, 0, 0, 0, 0, 128},
839 { 0, 0, 0, 0, 0, 0, 128},
840 { 0, 0, 0, 0, 0, 128},
848 /* This test exercises the horizontal and vertical filter functions. */
849 TEST_P(ConvolveTest, ChangeFilterWorks) {
850 uint8_t* const in = input();
851 uint8_t* const out = output();
853 /* Assume that the first input sample is at the 8/16th position. */
854 const int kInitialSubPelOffset = 8;
856 /* Filters are 8-tap, so the first filter tap will be applied to the pixel
857 * at position -3 with respect to the current filtering position. Since
858 * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
859 * which is non-zero only in the last tap. So, applying the filter at the
860 * current input position will result in an output equal to the pixel at
861 * offset +4 (-3 + 7) with respect to the current filtering position.
863 const int kPixelSelected = 4;
865 /* Assume that each output pixel requires us to step on by 17/16th pixels in
868 const int kInputPixelStep = 17;
870 /* The filters are setup in such a way that the expected output produces
871 * sets of 8 identical output samples. As the filter position moves to the
872 * next 1/16th pixel position the only active (=128) filter tap moves one
873 * position to the left, resulting in the same input pixel being replicated
874 * in to the output for 8 consecutive samples. After each set of 8 positions
875 * the filters select a different input pixel. kFilterPeriodAdjust below
876 * computes which input pixel is written to the output for a specified
880 /* Test the horizontal filter. */
881 ASM_REGISTER_STATE_CHECK(
882 UUT_->h8_(in, kInputStride, out, kOutputStride,
883 kChangeFilters[kInitialSubPelOffset],
884 kInputPixelStep, NULL, 0, Width(), Height()));
886 for (int x = 0; x < Width(); ++x) {
887 const int kFilterPeriodAdjust = (x >> 3) << 3;
889 kPixelSelected + ((kInitialSubPelOffset
890 + kFilterPeriodAdjust * kInputPixelStep)
892 ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
893 << "x == " << x << "width = " << Width();
896 /* Test the vertical filter. */
897 ASM_REGISTER_STATE_CHECK(
898 UUT_->v8_(in, kInputStride, out, kOutputStride,
899 NULL, 0, kChangeFilters[kInitialSubPelOffset],
900 kInputPixelStep, Width(), Height()));
902 for (int y = 0; y < Height(); ++y) {
903 const int kFilterPeriodAdjust = (y >> 3) << 3;
905 kPixelSelected + ((kInitialSubPelOffset
906 + kFilterPeriodAdjust * kInputPixelStep)
908 ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
912 /* Test the horizontal and vertical filters in combination. */
913 ASM_REGISTER_STATE_CHECK(
914 UUT_->hv8_(in, kInputStride, out, kOutputStride,
915 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
916 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
919 for (int y = 0; y < Height(); ++y) {
920 const int kFilterPeriodAdjustY = (y >> 3) << 3;
922 kPixelSelected + ((kInitialSubPelOffset
923 + kFilterPeriodAdjustY * kInputPixelStep)
925 for (int x = 0; x < Width(); ++x) {
926 const int kFilterPeriodAdjustX = (x >> 3) << 3;
928 kPixelSelected + ((kInitialSubPelOffset
929 + kFilterPeriodAdjustX * kInputPixelStep)
932 ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
933 lookup(out, y * kOutputStride + x))
934 << "x == " << x << ", y == " << y;
939 /* This test exercises that enough rows and columns are filtered with every
940 possible initial fractional positions and scaling steps. */
941 TEST_P(ConvolveTest, CheckScalingFiltering) {
942 uint8_t* const in = input();
943 uint8_t* const out = output();
944 const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
946 SetConstantInput(127);
948 for (int frac = 0; frac < 16; ++frac) {
949 for (int step = 1; step <= 32; ++step) {
950 /* Test the horizontal and vertical filters in combination. */
951 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
952 eighttap[frac], step,
953 eighttap[frac], step,
958 for (int y = 0; y < Height(); ++y) {
959 for (int x = 0; x < Width(); ++x) {
960 ASSERT_EQ(lookup(in, y * kInputStride + x),
961 lookup(out, y * kOutputStride + x))
962 << "x == " << x << ", y == " << y
963 << ", frac == " << frac << ", step == " << step;
970 using std::tr1::make_tuple;
972 #if CONFIG_VP9_HIGHBITDEPTH
973 #if HAVE_SSE2 && ARCH_X86_64
974 void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
975 uint8_t *dst, ptrdiff_t dst_stride,
976 const int16_t *filter_x,
978 const int16_t *filter_y,
981 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
982 filter_x_stride, filter_y, filter_y_stride,
986 void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
987 uint8_t *dst, ptrdiff_t dst_stride,
988 const int16_t *filter_x,
990 const int16_t *filter_y,
993 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
994 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
997 void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
998 uint8_t *dst, ptrdiff_t dst_stride,
999 const int16_t *filter_x,
1000 int filter_x_stride,
1001 const int16_t *filter_y,
1002 int filter_y_stride,
1004 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1005 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1008 void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1009 uint8_t *dst, ptrdiff_t dst_stride,
1010 const int16_t *filter_x,
1011 int filter_x_stride,
1012 const int16_t *filter_y,
1013 int filter_y_stride,
1015 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1016 filter_x_stride, filter_y, filter_y_stride,
1020 void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1021 uint8_t *dst, ptrdiff_t dst_stride,
1022 const int16_t *filter_x,
1023 int filter_x_stride,
1024 const int16_t *filter_y,
1025 int filter_y_stride,
1027 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1028 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1031 void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1032 uint8_t *dst, ptrdiff_t dst_stride,
1033 const int16_t *filter_x,
1034 int filter_x_stride,
1035 const int16_t *filter_y,
1036 int filter_y_stride,
1038 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1039 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1042 void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1043 uint8_t *dst, ptrdiff_t dst_stride,
1044 const int16_t *filter_x,
1045 int filter_x_stride,
1046 const int16_t *filter_y,
1047 int filter_y_stride,
1049 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1050 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1053 void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1054 uint8_t *dst, ptrdiff_t dst_stride,
1055 const int16_t *filter_x,
1056 int filter_x_stride,
1057 const int16_t *filter_y,
1058 int filter_y_stride,
1060 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1061 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1064 void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1065 uint8_t *dst, ptrdiff_t dst_stride,
1066 const int16_t *filter_x,
1067 int filter_x_stride,
1068 const int16_t *filter_y,
1069 int filter_y_stride,
1071 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1072 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1075 void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1076 uint8_t *dst, ptrdiff_t dst_stride,
1077 const int16_t *filter_x,
1078 int filter_x_stride,
1079 const int16_t *filter_y,
1080 int filter_y_stride,
1082 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1083 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1086 void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1087 uint8_t *dst, ptrdiff_t dst_stride,
1088 const int16_t *filter_x,
1089 int filter_x_stride,
1090 const int16_t *filter_y,
1091 int filter_y_stride,
1093 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1094 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1097 void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1098 uint8_t *dst, ptrdiff_t dst_stride,
1099 const int16_t *filter_x,
1100 int filter_x_stride,
1101 const int16_t *filter_y,
1102 int filter_y_stride,
1104 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1105 filter_x_stride, filter_y, filter_y_stride,
1109 void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1110 uint8_t *dst, ptrdiff_t dst_stride,
1111 const int16_t *filter_x,
1112 int filter_x_stride,
1113 const int16_t *filter_y,
1114 int filter_y_stride,
1116 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1117 filter_x_stride, filter_y, filter_y_stride,
1121 void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1122 uint8_t *dst, ptrdiff_t dst_stride,
1123 const int16_t *filter_x,
1124 int filter_x_stride,
1125 const int16_t *filter_y,
1126 int filter_y_stride,
1128 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1129 filter_x_stride, filter_y, filter_y_stride,
1133 void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1134 uint8_t *dst, ptrdiff_t dst_stride,
1135 const int16_t *filter_x,
1136 int filter_x_stride,
1137 const int16_t *filter_y,
1138 int filter_y_stride,
1140 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1141 filter_x_stride, filter_y, filter_y_stride,
1145 void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1146 uint8_t *dst, ptrdiff_t dst_stride,
1147 const int16_t *filter_x,
1148 int filter_x_stride,
1149 const int16_t *filter_y,
1150 int filter_y_stride,
1152 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1153 filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1156 void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1157 uint8_t *dst, ptrdiff_t dst_stride,
1158 const int16_t *filter_x,
1159 int filter_x_stride,
1160 const int16_t *filter_y,
1161 int filter_y_stride,
1163 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1164 filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1167 void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1168 uint8_t *dst, ptrdiff_t dst_stride,
1169 const int16_t *filter_x,
1170 int filter_x_stride,
1171 const int16_t *filter_y,
1172 int filter_y_stride,
1174 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1175 filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1177 #endif // HAVE_SSE2 && ARCH_X86_64
1179 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1180 uint8_t *dst, ptrdiff_t dst_stride,
1181 const int16_t *filter_x,
1182 int filter_x_stride,
1183 const int16_t *filter_y,
1184 int filter_y_stride,
1186 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1187 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1190 void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1191 uint8_t *dst, ptrdiff_t dst_stride,
1192 const int16_t *filter_x,
1193 int filter_x_stride,
1194 const int16_t *filter_y,
1195 int filter_y_stride,
1197 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1198 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1201 void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1202 uint8_t *dst, ptrdiff_t dst_stride,
1203 const int16_t *filter_x,
1204 int filter_x_stride,
1205 const int16_t *filter_y,
1206 int filter_y_stride,
1208 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1209 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1212 void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1213 uint8_t *dst, ptrdiff_t dst_stride,
1214 const int16_t *filter_x,
1215 int filter_x_stride,
1216 const int16_t *filter_y,
1217 int filter_y_stride,
1219 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1220 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1223 void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
1224 uint8_t *dst, ptrdiff_t dst_stride,
1225 const int16_t *filter_x,
1226 int filter_x_stride,
1227 const int16_t *filter_y,
1228 int filter_y_stride,
1230 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1231 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1234 void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1235 uint8_t *dst, ptrdiff_t dst_stride,
1236 const int16_t *filter_x,
1237 int filter_x_stride,
1238 const int16_t *filter_y,
1239 int filter_y_stride,
1241 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1242 filter_x_stride, filter_y, filter_y_stride,
1246 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1247 uint8_t *dst, ptrdiff_t dst_stride,
1248 const int16_t *filter_x,
1249 int filter_x_stride,
1250 const int16_t *filter_y,
1251 int filter_y_stride,
1253 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1254 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1257 void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1258 uint8_t *dst, ptrdiff_t dst_stride,
1259 const int16_t *filter_x,
1260 int filter_x_stride,
1261 const int16_t *filter_y,
1262 int filter_y_stride,
1264 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1265 filter_x_stride, filter_y, filter_y_stride,
1269 void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1270 uint8_t *dst, ptrdiff_t dst_stride,
1271 const int16_t *filter_x,
1272 int filter_x_stride,
1273 const int16_t *filter_y,
1274 int filter_y_stride,
1276 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1277 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1280 void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1281 uint8_t *dst, ptrdiff_t dst_stride,
1282 const int16_t *filter_x,
1283 int filter_x_stride,
1284 const int16_t *filter_y,
1285 int filter_y_stride,
1287 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1288 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1291 void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
1292 uint8_t *dst, ptrdiff_t dst_stride,
1293 const int16_t *filter_x,
1294 int filter_x_stride,
1295 const int16_t *filter_y,
1296 int filter_y_stride,
1298 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1299 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1302 void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1303 uint8_t *dst, ptrdiff_t dst_stride,
1304 const int16_t *filter_x,
1305 int filter_x_stride,
1306 const int16_t *filter_y,
1307 int filter_y_stride,
1309 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1310 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1313 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1314 uint8_t *dst, ptrdiff_t dst_stride,
1315 const int16_t *filter_x,
1316 int filter_x_stride,
1317 const int16_t *filter_y,
1318 int filter_y_stride,
1320 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1321 filter_x_stride, filter_y, filter_y_stride,
1325 void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1326 uint8_t *dst, ptrdiff_t dst_stride,
1327 const int16_t *filter_x,
1328 int filter_x_stride,
1329 const int16_t *filter_y,
1330 int filter_y_stride,
1332 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1333 filter_x_stride, filter_y, filter_y_stride,
1337 void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1338 uint8_t *dst, ptrdiff_t dst_stride,
1339 const int16_t *filter_x,
1340 int filter_x_stride,
1341 const int16_t *filter_y,
1342 int filter_y_stride,
1344 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1345 filter_x_stride, filter_y, filter_y_stride,
1349 void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1350 uint8_t *dst, ptrdiff_t dst_stride,
1351 const int16_t *filter_x,
1352 int filter_x_stride,
1353 const int16_t *filter_y,
1354 int filter_y_stride,
1356 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1357 filter_x_stride, filter_y, filter_y_stride,
1361 void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
1362 uint8_t *dst, ptrdiff_t dst_stride,
1363 const int16_t *filter_x,
1364 int filter_x_stride,
1365 const int16_t *filter_y,
1366 int filter_y_stride,
1368 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1369 filter_x_stride, filter_y, filter_y_stride,
1373 void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1374 uint8_t *dst, ptrdiff_t dst_stride,
1375 const int16_t *filter_x,
1376 int filter_x_stride,
1377 const int16_t *filter_y,
1378 int filter_y_stride,
1380 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1381 filter_x_stride, filter_y, filter_y_stride,
1385 const ConvolveFunctions convolve8_c(
1386 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1387 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1388 wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1389 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1390 make_tuple(4, 4, &convolve8_c),
1391 make_tuple(8, 4, &convolve8_c),
1392 make_tuple(4, 8, &convolve8_c),
1393 make_tuple(8, 8, &convolve8_c),
1394 make_tuple(16, 8, &convolve8_c),
1395 make_tuple(8, 16, &convolve8_c),
1396 make_tuple(16, 16, &convolve8_c),
1397 make_tuple(32, 16, &convolve8_c),
1398 make_tuple(16, 32, &convolve8_c),
1399 make_tuple(32, 32, &convolve8_c),
1400 make_tuple(64, 32, &convolve8_c),
1401 make_tuple(32, 64, &convolve8_c),
1402 make_tuple(64, 64, &convolve8_c)));
1403 const ConvolveFunctions convolve10_c(
1404 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1405 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1406 wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1407 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1408 make_tuple(4, 4, &convolve10_c),
1409 make_tuple(8, 4, &convolve10_c),
1410 make_tuple(4, 8, &convolve10_c),
1411 make_tuple(8, 8, &convolve10_c),
1412 make_tuple(16, 8, &convolve10_c),
1413 make_tuple(8, 16, &convolve10_c),
1414 make_tuple(16, 16, &convolve10_c),
1415 make_tuple(32, 16, &convolve10_c),
1416 make_tuple(16, 32, &convolve10_c),
1417 make_tuple(32, 32, &convolve10_c),
1418 make_tuple(64, 32, &convolve10_c),
1419 make_tuple(32, 64, &convolve10_c),
1420 make_tuple(64, 64, &convolve10_c)));
1421 const ConvolveFunctions convolve12_c(
1422 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1423 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1424 wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1425 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1426 make_tuple(4, 4, &convolve12_c),
1427 make_tuple(8, 4, &convolve12_c),
1428 make_tuple(4, 8, &convolve12_c),
1429 make_tuple(8, 8, &convolve12_c),
1430 make_tuple(16, 8, &convolve12_c),
1431 make_tuple(8, 16, &convolve12_c),
1432 make_tuple(16, 16, &convolve12_c),
1433 make_tuple(32, 16, &convolve12_c),
1434 make_tuple(16, 32, &convolve12_c),
1435 make_tuple(32, 32, &convolve12_c),
1436 make_tuple(64, 32, &convolve12_c),
1437 make_tuple(32, 64, &convolve12_c),
1438 make_tuple(64, 64, &convolve12_c)));
1442 const ConvolveFunctions convolve8_c(
1443 vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
1444 vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
1445 vp9_convolve8_c, vp9_convolve8_avg_c, 0);
1447 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1448 make_tuple(4, 4, &convolve8_c),
1449 make_tuple(8, 4, &convolve8_c),
1450 make_tuple(4, 8, &convolve8_c),
1451 make_tuple(8, 8, &convolve8_c),
1452 make_tuple(16, 8, &convolve8_c),
1453 make_tuple(8, 16, &convolve8_c),
1454 make_tuple(16, 16, &convolve8_c),
1455 make_tuple(32, 16, &convolve8_c),
1456 make_tuple(16, 32, &convolve8_c),
1457 make_tuple(32, 32, &convolve8_c),
1458 make_tuple(64, 32, &convolve8_c),
1459 make_tuple(32, 64, &convolve8_c),
1460 make_tuple(64, 64, &convolve8_c)));
1463 #if HAVE_SSE2 && ARCH_X86_64
1464 #if CONFIG_VP9_HIGHBITDEPTH
1465 const ConvolveFunctions convolve8_sse2(
1466 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1467 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1468 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1469 INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
1470 make_tuple(4, 4, &convolve8_sse2),
1471 make_tuple(8, 4, &convolve8_sse2),
1472 make_tuple(4, 8, &convolve8_sse2),
1473 make_tuple(8, 8, &convolve8_sse2),
1474 make_tuple(16, 8, &convolve8_sse2),
1475 make_tuple(8, 16, &convolve8_sse2),
1476 make_tuple(16, 16, &convolve8_sse2),
1477 make_tuple(32, 16, &convolve8_sse2),
1478 make_tuple(16, 32, &convolve8_sse2),
1479 make_tuple(32, 32, &convolve8_sse2),
1480 make_tuple(64, 32, &convolve8_sse2),
1481 make_tuple(32, 64, &convolve8_sse2),
1482 make_tuple(64, 64, &convolve8_sse2)));
1483 const ConvolveFunctions convolve10_sse2(
1484 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1485 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1486 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1487 INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
1488 make_tuple(4, 4, &convolve10_sse2),
1489 make_tuple(8, 4, &convolve10_sse2),
1490 make_tuple(4, 8, &convolve10_sse2),
1491 make_tuple(8, 8, &convolve10_sse2),
1492 make_tuple(16, 8, &convolve10_sse2),
1493 make_tuple(8, 16, &convolve10_sse2),
1494 make_tuple(16, 16, &convolve10_sse2),
1495 make_tuple(32, 16, &convolve10_sse2),
1496 make_tuple(16, 32, &convolve10_sse2),
1497 make_tuple(32, 32, &convolve10_sse2),
1498 make_tuple(64, 32, &convolve10_sse2),
1499 make_tuple(32, 64, &convolve10_sse2),
1500 make_tuple(64, 64, &convolve10_sse2)));
1501 const ConvolveFunctions convolve12_sse2(
1502 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1503 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1504 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1505 INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
1506 make_tuple(4, 4, &convolve12_sse2),
1507 make_tuple(8, 4, &convolve12_sse2),
1508 make_tuple(4, 8, &convolve12_sse2),
1509 make_tuple(8, 8, &convolve12_sse2),
1510 make_tuple(16, 8, &convolve12_sse2),
1511 make_tuple(8, 16, &convolve12_sse2),
1512 make_tuple(16, 16, &convolve12_sse2),
1513 make_tuple(32, 16, &convolve12_sse2),
1514 make_tuple(16, 32, &convolve12_sse2),
1515 make_tuple(32, 32, &convolve12_sse2),
1516 make_tuple(64, 32, &convolve12_sse2),
1517 make_tuple(32, 64, &convolve12_sse2),
1518 make_tuple(64, 64, &convolve12_sse2)));
1520 const ConvolveFunctions convolve8_sse2(
1521 vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
1522 vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
1523 vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
1525 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1526 make_tuple(4, 4, &convolve8_sse2),
1527 make_tuple(8, 4, &convolve8_sse2),
1528 make_tuple(4, 8, &convolve8_sse2),
1529 make_tuple(8, 8, &convolve8_sse2),
1530 make_tuple(16, 8, &convolve8_sse2),
1531 make_tuple(8, 16, &convolve8_sse2),
1532 make_tuple(16, 16, &convolve8_sse2),
1533 make_tuple(32, 16, &convolve8_sse2),
1534 make_tuple(16, 32, &convolve8_sse2),
1535 make_tuple(32, 32, &convolve8_sse2),
1536 make_tuple(64, 32, &convolve8_sse2),
1537 make_tuple(32, 64, &convolve8_sse2),
1538 make_tuple(64, 64, &convolve8_sse2)));
1539 #endif // CONFIG_VP9_HIGHBITDEPTH
1543 const ConvolveFunctions convolve8_ssse3(
1544 vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
1545 vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
1546 vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
1548 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1549 make_tuple(4, 4, &convolve8_ssse3),
1550 make_tuple(8, 4, &convolve8_ssse3),
1551 make_tuple(4, 8, &convolve8_ssse3),
1552 make_tuple(8, 8, &convolve8_ssse3),
1553 make_tuple(16, 8, &convolve8_ssse3),
1554 make_tuple(8, 16, &convolve8_ssse3),
1555 make_tuple(16, 16, &convolve8_ssse3),
1556 make_tuple(32, 16, &convolve8_ssse3),
1557 make_tuple(16, 32, &convolve8_ssse3),
1558 make_tuple(32, 32, &convolve8_ssse3),
1559 make_tuple(64, 32, &convolve8_ssse3),
1560 make_tuple(32, 64, &convolve8_ssse3),
1561 make_tuple(64, 64, &convolve8_ssse3)));
1564 #if HAVE_AVX2 && HAVE_SSSE3
1565 const ConvolveFunctions convolve8_avx2(
1566 vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
1567 vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
1568 vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
1570 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1571 make_tuple(4, 4, &convolve8_avx2),
1572 make_tuple(8, 4, &convolve8_avx2),
1573 make_tuple(4, 8, &convolve8_avx2),
1574 make_tuple(8, 8, &convolve8_avx2),
1575 make_tuple(8, 16, &convolve8_avx2),
1576 make_tuple(16, 8, &convolve8_avx2),
1577 make_tuple(16, 16, &convolve8_avx2),
1578 make_tuple(32, 16, &convolve8_avx2),
1579 make_tuple(16, 32, &convolve8_avx2),
1580 make_tuple(32, 32, &convolve8_avx2),
1581 make_tuple(64, 32, &convolve8_avx2),
1582 make_tuple(32, 64, &convolve8_avx2),
1583 make_tuple(64, 64, &convolve8_avx2)));
1584 #endif // HAVE_AVX2 && HAVE_SSSE3
1587 const ConvolveFunctions convolve8_neon(
1588 vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
1589 vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
1590 vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
1592 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1593 make_tuple(4, 4, &convolve8_neon),
1594 make_tuple(8, 4, &convolve8_neon),
1595 make_tuple(4, 8, &convolve8_neon),
1596 make_tuple(8, 8, &convolve8_neon),
1597 make_tuple(16, 8, &convolve8_neon),
1598 make_tuple(8, 16, &convolve8_neon),
1599 make_tuple(16, 16, &convolve8_neon),
1600 make_tuple(32, 16, &convolve8_neon),
1601 make_tuple(16, 32, &convolve8_neon),
1602 make_tuple(32, 32, &convolve8_neon),
1603 make_tuple(64, 32, &convolve8_neon),
1604 make_tuple(32, 64, &convolve8_neon),
1605 make_tuple(64, 64, &convolve8_neon)));
1609 const ConvolveFunctions convolve8_dspr2(
1610 vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
1611 vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
1612 vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
1614 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1615 make_tuple(4, 4, &convolve8_dspr2),
1616 make_tuple(8, 4, &convolve8_dspr2),
1617 make_tuple(4, 8, &convolve8_dspr2),
1618 make_tuple(8, 8, &convolve8_dspr2),
1619 make_tuple(16, 8, &convolve8_dspr2),
1620 make_tuple(8, 16, &convolve8_dspr2),
1621 make_tuple(16, 16, &convolve8_dspr2),
1622 make_tuple(32, 16, &convolve8_dspr2),
1623 make_tuple(16, 32, &convolve8_dspr2),
1624 make_tuple(32, 32, &convolve8_dspr2),
1625 make_tuple(64, 32, &convolve8_dspr2),
1626 make_tuple(32, 64, &convolve8_dspr2),
1627 make_tuple(64, 64, &convolve8_dspr2)));