Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / third_party / libvpx / source / libvpx / test / convolve_test.cc
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include <string.h>
12 #include "test/acm_random.h"
13 #include "test/register_state_check.h"
14 #include "test/util.h"
15 #include "third_party/googletest/src/include/gtest/gtest.h"
16
17 #include "./vpx_config.h"
18 #include "./vp9_rtcd.h"
19 #include "vp9/common/vp9_filter.h"
20 #include "vpx_mem/vpx_mem.h"
21 #include "vpx_ports/mem.h"
22
23 namespace {
24
25 static const unsigned int kMaxDimension = 64;
26
27 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
28                              uint8_t *dst, ptrdiff_t dst_stride,
29                              const int16_t *filter_x, int filter_x_stride,
30                              const int16_t *filter_y, int filter_y_stride,
31                              int w, int h);
32
33 struct ConvolveFunctions {
34   ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
35                     ConvolveFunc v8, ConvolveFunc v8_avg,
36                     ConvolveFunc hv8, ConvolveFunc hv8_avg,
37                     int bd)
38       : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
39         hv8_avg_(hv8_avg), use_high_bd_(bd) {}
40
41   ConvolveFunc h8_;
42   ConvolveFunc v8_;
43   ConvolveFunc hv8_;
44   ConvolveFunc h8_avg_;
45   ConvolveFunc v8_avg_;
46   ConvolveFunc hv8_avg_;
47   int use_high_bd_;  // 0 if high bitdepth not used, else the actual bit depth.
48 };
49
50 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
51
52 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
53 #define VP9_FILTER_WEIGHT 128
54 #define VP9_FILTER_SHIFT 7
55 uint8_t clip_pixel(int x) {
56   return x < 0 ? 0 :
57          x > 255 ? 255 :
58          x;
59 }
60
61 void filter_block2d_8_c(const uint8_t *src_ptr,
62                         const unsigned int src_stride,
63                         const int16_t *HFilter,
64                         const int16_t *VFilter,
65                         uint8_t *dst_ptr,
66                         unsigned int dst_stride,
67                         unsigned int output_width,
68                         unsigned int output_height) {
69   // Between passes, we use an intermediate buffer whose height is extended to
70   // have enough horizontally filtered values as input for the vertical pass.
71   // This buffer is allocated to be big enough for the largest block type we
72   // support.
73   const int kInterp_Extend = 4;
74   const unsigned int intermediate_height =
75       (kInterp_Extend - 1) + output_height + kInterp_Extend;
76   unsigned int i, j;
77
78   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
79   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
80   //                                 + kInterp_Extend
81   //                               = 3 + 16 + 4
82   //                               = 23
83   // and filter_max_width          = 16
84   //
85   uint8_t intermediate_buffer[71 * kMaxDimension];
86   const int intermediate_next_stride = 1 - intermediate_height * output_width;
87
88   // Horizontal pass (src -> transposed intermediate).
89   uint8_t *output_ptr = intermediate_buffer;
90   const int src_next_row_stride = src_stride - output_width;
91   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
92   for (i = 0; i < intermediate_height; ++i) {
93     for (j = 0; j < output_width; ++j) {
94       // Apply filter...
95       const int temp = (src_ptr[0] * HFilter[0]) +
96           (src_ptr[1] * HFilter[1]) +
97           (src_ptr[2] * HFilter[2]) +
98           (src_ptr[3] * HFilter[3]) +
99           (src_ptr[4] * HFilter[4]) +
100           (src_ptr[5] * HFilter[5]) +
101           (src_ptr[6] * HFilter[6]) +
102           (src_ptr[7] * HFilter[7]) +
103           (VP9_FILTER_WEIGHT >> 1);  // Rounding
104
105       // Normalize back to 0-255...
106       *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
107       ++src_ptr;
108       output_ptr += intermediate_height;
109     }
110     src_ptr += src_next_row_stride;
111     output_ptr += intermediate_next_stride;
112   }
113
114   // Vertical pass (transposed intermediate -> dst).
115   src_ptr = intermediate_buffer;
116   const int dst_next_row_stride = dst_stride - output_width;
117   for (i = 0; i < output_height; ++i) {
118     for (j = 0; j < output_width; ++j) {
119       // Apply filter...
120       const int temp = (src_ptr[0] * VFilter[0]) +
121           (src_ptr[1] * VFilter[1]) +
122           (src_ptr[2] * VFilter[2]) +
123           (src_ptr[3] * VFilter[3]) +
124           (src_ptr[4] * VFilter[4]) +
125           (src_ptr[5] * VFilter[5]) +
126           (src_ptr[6] * VFilter[6]) +
127           (src_ptr[7] * VFilter[7]) +
128           (VP9_FILTER_WEIGHT >> 1);  // Rounding
129
130       // Normalize back to 0-255...
131       *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
132       src_ptr += intermediate_height;
133     }
134     src_ptr += intermediate_next_stride;
135     dst_ptr += dst_next_row_stride;
136   }
137 }
138
139 void block2d_average_c(uint8_t *src,
140                        unsigned int src_stride,
141                        uint8_t *output_ptr,
142                        unsigned int output_stride,
143                        unsigned int output_width,
144                        unsigned int output_height) {
145   unsigned int i, j;
146   for (i = 0; i < output_height; ++i) {
147     for (j = 0; j < output_width; ++j) {
148       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
149     }
150     output_ptr += output_stride;
151   }
152 }
153
154 void filter_average_block2d_8_c(const uint8_t *src_ptr,
155                                 const unsigned int src_stride,
156                                 const int16_t *HFilter,
157                                 const int16_t *VFilter,
158                                 uint8_t *dst_ptr,
159                                 unsigned int dst_stride,
160                                 unsigned int output_width,
161                                 unsigned int output_height) {
162   uint8_t tmp[kMaxDimension * kMaxDimension];
163
164   assert(output_width <= kMaxDimension);
165   assert(output_height <= kMaxDimension);
166   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
167                      output_width, output_height);
168   block2d_average_c(tmp, 64, dst_ptr, dst_stride,
169                     output_width, output_height);
170 }
171
172 #if CONFIG_VP9_HIGHBITDEPTH
173 void high_filter_block2d_8_c(const uint16_t *src_ptr,
174                              const unsigned int src_stride,
175                              const int16_t *HFilter,
176                              const int16_t *VFilter,
177                              uint16_t *dst_ptr,
178                              unsigned int dst_stride,
179                              unsigned int output_width,
180                              unsigned int output_height,
181                              int bd) {
182   // Between passes, we use an intermediate buffer whose height is extended to
183   // have enough horizontally filtered values as input for the vertical pass.
184   // This buffer is allocated to be big enough for the largest block type we
185   // support.
186   const int kInterp_Extend = 4;
187   const unsigned int intermediate_height =
188       (kInterp_Extend - 1) + output_height + kInterp_Extend;
189
190   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
191    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
192    *                                 + kInterp_Extend
193    *                               = 3 + 16 + 4
194    *                               = 23
195    * and filter_max_width = 16
196    */
197   uint16_t intermediate_buffer[71 * kMaxDimension];
198   const int intermediate_next_stride = 1 - intermediate_height * output_width;
199
200   // Horizontal pass (src -> transposed intermediate).
201   {
202     uint16_t *output_ptr = intermediate_buffer;
203     const int src_next_row_stride = src_stride - output_width;
204     unsigned int i, j;
205     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
206     for (i = 0; i < intermediate_height; ++i) {
207       for (j = 0; j < output_width; ++j) {
208         // Apply filter...
209         const int temp = (src_ptr[0] * HFilter[0]) +
210                          (src_ptr[1] * HFilter[1]) +
211                          (src_ptr[2] * HFilter[2]) +
212                          (src_ptr[3] * HFilter[3]) +
213                          (src_ptr[4] * HFilter[4]) +
214                          (src_ptr[5] * HFilter[5]) +
215                          (src_ptr[6] * HFilter[6]) +
216                          (src_ptr[7] * HFilter[7]) +
217                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
218
219         // Normalize back to 0-255...
220         *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
221         ++src_ptr;
222         output_ptr += intermediate_height;
223       }
224       src_ptr += src_next_row_stride;
225       output_ptr += intermediate_next_stride;
226     }
227   }
228
229   // Vertical pass (transposed intermediate -> dst).
230   {
231     uint16_t *src_ptr = intermediate_buffer;
232     const int dst_next_row_stride = dst_stride - output_width;
233     unsigned int i, j;
234     for (i = 0; i < output_height; ++i) {
235       for (j = 0; j < output_width; ++j) {
236         // Apply filter...
237         const int temp = (src_ptr[0] * VFilter[0]) +
238                          (src_ptr[1] * VFilter[1]) +
239                          (src_ptr[2] * VFilter[2]) +
240                          (src_ptr[3] * VFilter[3]) +
241                          (src_ptr[4] * VFilter[4]) +
242                          (src_ptr[5] * VFilter[5]) +
243                          (src_ptr[6] * VFilter[6]) +
244                          (src_ptr[7] * VFilter[7]) +
245                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
246
247         // Normalize back to 0-255...
248         *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
249         src_ptr += intermediate_height;
250       }
251       src_ptr += intermediate_next_stride;
252       dst_ptr += dst_next_row_stride;
253     }
254   }
255 }
256
257 void high_block2d_average_c(uint16_t *src,
258                             unsigned int src_stride,
259                             uint16_t *output_ptr,
260                             unsigned int output_stride,
261                             unsigned int output_width,
262                             unsigned int output_height,
263                             int bd) {
264   unsigned int i, j;
265   for (i = 0; i < output_height; ++i) {
266     for (j = 0; j < output_width; ++j) {
267       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
268     }
269     output_ptr += output_stride;
270   }
271 }
272
273 void high_filter_average_block2d_8_c(const uint16_t *src_ptr,
274                                      const unsigned int src_stride,
275                                      const int16_t *HFilter,
276                                      const int16_t *VFilter,
277                                      uint16_t *dst_ptr,
278                                      unsigned int dst_stride,
279                                      unsigned int output_width,
280                                      unsigned int output_height,
281                                      int bd) {
282   uint16_t tmp[kMaxDimension * kMaxDimension];
283
284   assert(output_width <= kMaxDimension);
285   assert(output_height <= kMaxDimension);
286   high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
287                           output_width, output_height, bd);
288   high_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
289                          output_width, output_height, bd);
290 }
291 #endif  // CONFIG_VP9_HIGHBITDEPTH
292
293 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
294  public:
295   static void SetUpTestCase() {
296     // Force input_ to be unaligned, output to be 16 byte aligned.
297     input_ = reinterpret_cast<uint8_t*>(
298         vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
299     output_ = reinterpret_cast<uint8_t*>(
300         vpx_memalign(kDataAlignment, kOutputBufferSize));
301 #if CONFIG_VP9_HIGHBITDEPTH
302     input16_ = reinterpret_cast<uint16_t*>(
303         vpx_memalign(kDataAlignment,
304                      (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
305     output16_ = reinterpret_cast<uint16_t*>(
306         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
307 #endif
308   }
309
310   static void TearDownTestCase() {
311     vpx_free(input_ - 1);
312     input_ = NULL;
313     vpx_free(output_);
314     output_ = NULL;
315 #if CONFIG_VP9_HIGHBITDEPTH
316     vpx_free(input16_ - 1);
317     input16_ = NULL;
318     vpx_free(output16_);
319     output16_ = NULL;
320 #endif
321   }
322
323  protected:
324   static const int kDataAlignment = 16;
325   static const int kOuterBlockSize = 256;
326   static const int kInputStride = kOuterBlockSize;
327   static const int kOutputStride = kOuterBlockSize;
328   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
329   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
330
331   int Width() const { return GET_PARAM(0); }
332   int Height() const { return GET_PARAM(1); }
333   int BorderLeft() const {
334     const int center = (kOuterBlockSize - Width()) / 2;
335     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
336   }
337   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
338
339   bool IsIndexInBorder(int i) {
340     return (i < BorderTop() * kOuterBlockSize ||
341             i >= (BorderTop() + Height()) * kOuterBlockSize ||
342             i % kOuterBlockSize < BorderLeft() ||
343             i % kOuterBlockSize >= (BorderLeft() + Width()));
344   }
345
346   virtual void SetUp() {
347     UUT_ = GET_PARAM(2);
348 #if CONFIG_VP9_HIGHBITDEPTH
349     if (UUT_->use_high_bd_ != 0)
350       mask_ = (1 << UUT_->use_high_bd_) - 1;
351     else
352       mask_ = 255;
353 #endif
354     /* Set up guard blocks for an inner block centered in the outer block */
355     for (int i = 0; i < kOutputBufferSize; ++i) {
356       if (IsIndexInBorder(i))
357         output_[i] = 255;
358       else
359         output_[i] = 0;
360     }
361
362     ::libvpx_test::ACMRandom prng;
363     for (int i = 0; i < kInputBufferSize; ++i) {
364       if (i & 1) {
365         input_[i] = 255;
366 #if CONFIG_VP9_HIGHBITDEPTH
367         input16_[i] = mask_;
368 #endif
369       } else {
370         input_[i] = prng.Rand8Extremes();
371 #if CONFIG_VP9_HIGHBITDEPTH
372         input16_[i] = prng.Rand16() & mask_;
373 #endif
374       }
375     }
376   }
377
378   void SetConstantInput(int value) {
379     memset(input_, value, kInputBufferSize);
380 #if CONFIG_VP9_HIGHBITDEPTH
381     vpx_memset16(input16_, value, kInputBufferSize);
382 #endif
383   }
384
385   void CheckGuardBlocks() {
386     for (int i = 0; i < kOutputBufferSize; ++i) {
387       if (IsIndexInBorder(i))
388         EXPECT_EQ(255, output_[i]);
389     }
390   }
391
392   uint8_t *input() const {
393 #if CONFIG_VP9_HIGHBITDEPTH
394     if (UUT_->use_high_bd_ == 0) {
395       return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
396     } else {
397       return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
398                                 BorderLeft());
399     }
400 #else
401     return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
402 #endif
403   }
404
405   uint8_t *output() const {
406 #if CONFIG_VP9_HIGHBITDEPTH
407     if (UUT_->use_high_bd_ == 0) {
408       return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
409     } else {
410       return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
411                                 BorderLeft());
412     }
413 #else
414     return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
415 #endif
416   }
417
418   uint16_t lookup(uint8_t *list, int index) const {
419 #if CONFIG_VP9_HIGHBITDEPTH
420     if (UUT_->use_high_bd_ == 0) {
421       return list[index];
422     } else {
423       return CONVERT_TO_SHORTPTR(list)[index];
424     }
425 #else
426     return list[index];
427 #endif
428   }
429
430   void assign_val(uint8_t *list, int index, uint16_t val) const {
431 #if CONFIG_VP9_HIGHBITDEPTH
432     if (UUT_->use_high_bd_ == 0) {
433       list[index] = (uint8_t) val;
434     } else {
435       CONVERT_TO_SHORTPTR(list)[index] = val;
436     }
437 #else
438     list[index] = (uint8_t) val;
439 #endif
440   }
441
442   void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
443                                           const unsigned int src_stride,
444                                           const int16_t *HFilter,
445                                           const int16_t *VFilter,
446                                           uint8_t *dst_ptr,
447                                           unsigned int dst_stride,
448                                           unsigned int output_width,
449                                           unsigned int output_height) {
450 #if CONFIG_VP9_HIGHBITDEPTH
451     if (UUT_->use_high_bd_ == 0) {
452       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
453                                  dst_ptr, dst_stride, output_width,
454                                  output_height);
455     } else {
456       high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
457                                       HFilter, VFilter,
458                                       CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
459                                       output_width, output_height,
460                                       UUT_->use_high_bd_);
461     }
462 #else
463     filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
464                                dst_ptr, dst_stride, output_width,
465                                output_height);
466 #endif
467   }
468
469   void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
470                                   const unsigned int src_stride,
471                                   const int16_t *HFilter,
472                                   const int16_t *VFilter,
473                                   uint8_t *dst_ptr,
474                                   unsigned int dst_stride,
475                                   unsigned int output_width,
476                                   unsigned int output_height) {
477 #if CONFIG_VP9_HIGHBITDEPTH
478     if (UUT_->use_high_bd_ == 0) {
479       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
480                          dst_ptr, dst_stride, output_width, output_height);
481     } else {
482       high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
483                               HFilter, VFilter,
484                               CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
485                               output_width, output_height, UUT_->use_high_bd_);
486     }
487 #else
488     filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
489                        dst_ptr, dst_stride, output_width, output_height);
490 #endif
491   }
492
493   const ConvolveFunctions* UUT_;
494   static uint8_t* input_;
495   static uint8_t* output_;
496 #if CONFIG_VP9_HIGHBITDEPTH
497   static uint16_t* input16_;
498   static uint16_t* output16_;
499   int mask_;
500 #endif
501 };
502
503 uint8_t* ConvolveTest::input_ = NULL;
504 uint8_t* ConvolveTest::output_ = NULL;
505 #if CONFIG_VP9_HIGHBITDEPTH
506 uint16_t* ConvolveTest::input16_ = NULL;
507 uint16_t* ConvolveTest::output16_ = NULL;
508 #endif
509
510 TEST_P(ConvolveTest, GuardBlocks) {
511   CheckGuardBlocks();
512 }
513
514 TEST_P(ConvolveTest, CopyHoriz) {
515   uint8_t* const in = input();
516   uint8_t* const out = output();
517   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
518
519   ASM_REGISTER_STATE_CHECK(
520       UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
521                 Width(), Height()));
522
523   CheckGuardBlocks();
524
525   for (int y = 0; y < Height(); ++y)
526     for (int x = 0; x < Width(); ++x)
527       ASSERT_EQ(lookup(out, y * kOutputStride + x),
528                 lookup(in, y * kInputStride + x))
529           << "(" << x << "," << y << ")";
530 }
531
532 TEST_P(ConvolveTest, CopyVert) {
533   uint8_t* const in = input();
534   uint8_t* const out = output();
535   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
536
537   ASM_REGISTER_STATE_CHECK(
538       UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
539                 Width(), Height()));
540
541   CheckGuardBlocks();
542
543   for (int y = 0; y < Height(); ++y)
544     for (int x = 0; x < Width(); ++x)
545       ASSERT_EQ(lookup(out, y * kOutputStride + x),
546                 lookup(in, y * kInputStride + x))
547           << "(" << x << "," << y << ")";
548 }
549
550 TEST_P(ConvolveTest, Copy2D) {
551   uint8_t* const in = input();
552   uint8_t* const out = output();
553   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
554
555   ASM_REGISTER_STATE_CHECK(
556       UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
557                  Width(), Height()));
558
559   CheckGuardBlocks();
560
561   for (int y = 0; y < Height(); ++y)
562     for (int x = 0; x < Width(); ++x)
563       ASSERT_EQ(lookup(out, y * kOutputStride + x),
564                 lookup(in, y * kInputStride + x))
565           << "(" << x << "," << y << ")";
566 }
567
568 const int kNumFilterBanks = 4;
569 const int kNumFilters = 16;
570
571 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
572   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
573     const InterpKernel *filters =
574         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
575     for (int i = 0; i < kNumFilters; i++) {
576       const int p0 = filters[i][0] + filters[i][1];
577       const int p1 = filters[i][2] + filters[i][3];
578       const int p2 = filters[i][4] + filters[i][5];
579       const int p3 = filters[i][6] + filters[i][7];
580       EXPECT_LE(p0, 128);
581       EXPECT_LE(p1, 128);
582       EXPECT_LE(p2, 128);
583       EXPECT_LE(p3, 128);
584       EXPECT_LE(p0 + p3, 128);
585       EXPECT_LE(p0 + p3 + p1, 128);
586       EXPECT_LE(p0 + p3 + p1 + p2, 128);
587       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
588     }
589   }
590 }
591
592 const int16_t kInvalidFilter[8] = { 0 };
593
594 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
595   uint8_t* const in = input();
596   uint8_t* const out = output();
597 #if CONFIG_VP9_HIGHBITDEPTH
598   uint8_t ref8[kOutputStride * kMaxDimension];
599   uint16_t ref16[kOutputStride * kMaxDimension];
600   uint8_t* ref;
601   if (UUT_->use_high_bd_ == 0) {
602     ref = ref8;
603   } else {
604     ref = CONVERT_TO_BYTEPTR(ref16);
605   }
606 #else
607   uint8_t ref[kOutputStride * kMaxDimension];
608 #endif
609
610   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
611     const InterpKernel *filters =
612         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
613     const InterpKernel *const eighttap_smooth =
614         vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
615
616     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
617       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
618         wrapper_filter_block2d_8_c(in, kInputStride,
619                                    filters[filter_x], filters[filter_y],
620                                    ref, kOutputStride,
621                                    Width(), Height());
622
623         if (filters == eighttap_smooth || (filter_x && filter_y))
624           ASM_REGISTER_STATE_CHECK(
625               UUT_->hv8_(in, kInputStride, out, kOutputStride,
626                          filters[filter_x], 16, filters[filter_y], 16,
627                          Width(), Height()));
628         else if (filter_y)
629           ASM_REGISTER_STATE_CHECK(
630               UUT_->v8_(in, kInputStride, out, kOutputStride,
631                         kInvalidFilter, 16, filters[filter_y], 16,
632                         Width(), Height()));
633         else
634           ASM_REGISTER_STATE_CHECK(
635               UUT_->h8_(in, kInputStride, out, kOutputStride,
636                         filters[filter_x], 16, kInvalidFilter, 16,
637                         Width(), Height()));
638
639         CheckGuardBlocks();
640
641         for (int y = 0; y < Height(); ++y)
642           for (int x = 0; x < Width(); ++x)
643             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
644                       lookup(out, y * kOutputStride + x))
645                 << "mismatch at (" << x << "," << y << "), "
646                 << "filters (" << filter_bank << ","
647                 << filter_x << "," << filter_y << ")";
648       }
649     }
650   }
651 }
652
653 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
654   uint8_t* const in = input();
655   uint8_t* const out = output();
656 #if CONFIG_VP9_HIGHBITDEPTH
657   uint8_t ref8[kOutputStride * kMaxDimension];
658   uint16_t ref16[kOutputStride * kMaxDimension];
659   uint8_t* ref;
660   if (UUT_->use_high_bd_ == 0) {
661     ref = ref8;
662   } else {
663     ref = CONVERT_TO_BYTEPTR(ref16);
664   }
665 #else
666   uint8_t ref[kOutputStride * kMaxDimension];
667 #endif
668
669   // Populate ref and out with some random data
670   ::libvpx_test::ACMRandom prng;
671   for (int y = 0; y < Height(); ++y) {
672     for (int x = 0; x < Width(); ++x) {
673       uint16_t r;
674 #if CONFIG_VP9_HIGHBITDEPTH
675       if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
676         r = prng.Rand8Extremes();
677       } else {
678         r = prng.Rand16() & mask_;
679       }
680 #else
681       r = prng.Rand8Extremes();
682 #endif
683
684       assign_val(out, y * kOutputStride + x, r);
685       assign_val(ref, y * kOutputStride + x, r);
686     }
687   }
688
689   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
690     const InterpKernel *filters =
691         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
692     const InterpKernel *const eighttap_smooth =
693         vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
694
695     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
696       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
697         wrapper_filter_average_block2d_8_c(in, kInputStride,
698                                            filters[filter_x], filters[filter_y],
699                                            ref, kOutputStride,
700                                            Width(), Height());
701
702         if (filters == eighttap_smooth || (filter_x && filter_y))
703           ASM_REGISTER_STATE_CHECK(
704               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
705                              filters[filter_x], 16, filters[filter_y], 16,
706                              Width(), Height()));
707         else if (filter_y)
708           ASM_REGISTER_STATE_CHECK(
709               UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
710                             filters[filter_x], 16, filters[filter_y], 16,
711                             Width(), Height()));
712         else
713           ASM_REGISTER_STATE_CHECK(
714               UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
715                             filters[filter_x], 16, filters[filter_y], 16,
716                             Width(), Height()));
717
718         CheckGuardBlocks();
719
720         for (int y = 0; y < Height(); ++y)
721           for (int x = 0; x < Width(); ++x)
722             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
723                       lookup(out, y * kOutputStride + x))
724                 << "mismatch at (" << x << "," << y << "), "
725                 << "filters (" << filter_bank << ","
726                 << filter_x << "," << filter_y << ")";
727       }
728     }
729   }
730 }
731
732 TEST_P(ConvolveTest, FilterExtremes) {
733   uint8_t *const in = input();
734   uint8_t *const out = output();
735 #if CONFIG_VP9_HIGHBITDEPTH
736   uint8_t ref8[kOutputStride * kMaxDimension];
737   uint16_t ref16[kOutputStride * kMaxDimension];
738   uint8_t *ref;
739   if (UUT_->use_high_bd_ == 0) {
740     ref = ref8;
741   } else {
742     ref = CONVERT_TO_BYTEPTR(ref16);
743   }
744 #else
745   uint8_t ref[kOutputStride * kMaxDimension];
746 #endif
747
748   // Populate ref and out with some random data
749   ::libvpx_test::ACMRandom prng;
750   for (int y = 0; y < Height(); ++y) {
751     for (int x = 0; x < Width(); ++x) {
752       uint16_t r;
753 #if CONFIG_VP9_HIGHBITDEPTH
754       if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
755         r = prng.Rand8Extremes();
756       } else {
757         r = prng.Rand16() & mask_;
758       }
759 #else
760       r = prng.Rand8Extremes();
761 #endif
762       assign_val(out, y * kOutputStride + x, r);
763       assign_val(ref, y * kOutputStride + x, r);
764     }
765   }
766
767   for (int axis = 0; axis < 2; axis++) {
768     int seed_val = 0;
769     while (seed_val < 256) {
770       for (int y = 0; y < 8; ++y) {
771         for (int x = 0; x < 8; ++x) {
772 #if CONFIG_VP9_HIGHBITDEPTH
773             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
774                        ((seed_val >> (axis ? y : x)) & 1) * mask_);
775 #else
776             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
777                        ((seed_val >> (axis ? y : x)) & 1) * 255);
778 #endif
779           if (axis) seed_val++;
780         }
781         if (axis)
782           seed_val-= 8;
783         else
784           seed_val++;
785       }
786       if (axis) seed_val += 8;
787
788       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
789         const InterpKernel *filters =
790             vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
791         const InterpKernel *const eighttap_smooth =
792             vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
793         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
794           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
795             wrapper_filter_block2d_8_c(in, kInputStride,
796                                        filters[filter_x], filters[filter_y],
797                                        ref, kOutputStride,
798                                        Width(), Height());
799             if (filters == eighttap_smooth || (filter_x && filter_y))
800               ASM_REGISTER_STATE_CHECK(
801                   UUT_->hv8_(in, kInputStride, out, kOutputStride,
802                              filters[filter_x], 16, filters[filter_y], 16,
803                              Width(), Height()));
804             else if (filter_y)
805               ASM_REGISTER_STATE_CHECK(
806                   UUT_->v8_(in, kInputStride, out, kOutputStride,
807                             kInvalidFilter, 16, filters[filter_y], 16,
808                             Width(), Height()));
809             else
810               ASM_REGISTER_STATE_CHECK(
811                   UUT_->h8_(in, kInputStride, out, kOutputStride,
812                             filters[filter_x], 16, kInvalidFilter, 16,
813                             Width(), Height()));
814
815             for (int y = 0; y < Height(); ++y)
816               for (int x = 0; x < Width(); ++x)
817                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
818                           lookup(out, y * kOutputStride + x))
819                     << "mismatch at (" << x << "," << y << "), "
820                     << "filters (" << filter_bank << ","
821                     << filter_x << "," << filter_y << ")";
822           }
823         }
824       }
825     }
826   }
827 }
828
829 DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
830     { 0,   0,   0,   0,   0,   0,   0, 128},
831     { 0,   0,   0,   0,   0,   0, 128},
832     { 0,   0,   0,   0,   0, 128},
833     { 0,   0,   0,   0, 128},
834     { 0,   0,   0, 128},
835     { 0,   0, 128},
836     { 0, 128},
837     { 128},
838     { 0,   0,   0,   0,   0,   0,   0, 128},
839     { 0,   0,   0,   0,   0,   0, 128},
840     { 0,   0,   0,   0,   0, 128},
841     { 0,   0,   0,   0, 128},
842     { 0,   0,   0, 128},
843     { 0,   0, 128},
844     { 0, 128},
845     { 128}
846 };
847
848 /* This test exercises the horizontal and vertical filter functions. */
849 TEST_P(ConvolveTest, ChangeFilterWorks) {
850   uint8_t* const in = input();
851   uint8_t* const out = output();
852
853   /* Assume that the first input sample is at the 8/16th position. */
854   const int kInitialSubPelOffset = 8;
855
856   /* Filters are 8-tap, so the first filter tap will be applied to the pixel
857    * at position -3 with respect to the current filtering position. Since
858    * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
859    * which is non-zero only in the last tap. So, applying the filter at the
860    * current input position will result in an output equal to the pixel at
861    * offset +4 (-3 + 7) with respect to the current filtering position.
862    */
863   const int kPixelSelected = 4;
864
865   /* Assume that each output pixel requires us to step on by 17/16th pixels in
866    * the input.
867    */
868   const int kInputPixelStep = 17;
869
870   /* The filters are setup in such a way that the expected output produces
871    * sets of 8 identical output samples. As the filter position moves to the
872    * next 1/16th pixel position the only active (=128) filter tap moves one
873    * position to the left, resulting in the same input pixel being replicated
874    * in to the output for 8 consecutive samples. After each set of 8 positions
875    * the filters select a different input pixel. kFilterPeriodAdjust below
876    * computes which input pixel is written to the output for a specified
877    * x or y position.
878    */
879
880   /* Test the horizontal filter. */
881   ASM_REGISTER_STATE_CHECK(
882       UUT_->h8_(in, kInputStride, out, kOutputStride,
883                 kChangeFilters[kInitialSubPelOffset],
884                 kInputPixelStep, NULL, 0, Width(), Height()));
885
886   for (int x = 0; x < Width(); ++x) {
887     const int kFilterPeriodAdjust = (x >> 3) << 3;
888     const int ref_x =
889         kPixelSelected + ((kInitialSubPelOffset
890             + kFilterPeriodAdjust * kInputPixelStep)
891                           >> SUBPEL_BITS);
892     ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
893         << "x == " << x << "width = " << Width();
894   }
895
896   /* Test the vertical filter. */
897   ASM_REGISTER_STATE_CHECK(
898       UUT_->v8_(in, kInputStride, out, kOutputStride,
899                 NULL, 0, kChangeFilters[kInitialSubPelOffset],
900                 kInputPixelStep, Width(), Height()));
901
902   for (int y = 0; y < Height(); ++y) {
903     const int kFilterPeriodAdjust = (y >> 3) << 3;
904     const int ref_y =
905         kPixelSelected + ((kInitialSubPelOffset
906             + kFilterPeriodAdjust * kInputPixelStep)
907                           >> SUBPEL_BITS);
908     ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
909         << "y == " << y;
910   }
911
912   /* Test the horizontal and vertical filters in combination. */
913   ASM_REGISTER_STATE_CHECK(
914       UUT_->hv8_(in, kInputStride, out, kOutputStride,
915                  kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
916                  kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
917                  Width(), Height()));
918
919   for (int y = 0; y < Height(); ++y) {
920     const int kFilterPeriodAdjustY = (y >> 3) << 3;
921     const int ref_y =
922         kPixelSelected + ((kInitialSubPelOffset
923             + kFilterPeriodAdjustY * kInputPixelStep)
924                           >> SUBPEL_BITS);
925     for (int x = 0; x < Width(); ++x) {
926       const int kFilterPeriodAdjustX = (x >> 3) << 3;
927       const int ref_x =
928           kPixelSelected + ((kInitialSubPelOffset
929               + kFilterPeriodAdjustX * kInputPixelStep)
930                             >> SUBPEL_BITS);
931
932       ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
933                 lookup(out, y * kOutputStride + x))
934           << "x == " << x << ", y == " << y;
935     }
936   }
937 }
938
939 /* This test exercises that enough rows and columns are filtered with every
940    possible initial fractional positions and scaling steps. */
941 TEST_P(ConvolveTest, CheckScalingFiltering) {
942   uint8_t* const in = input();
943   uint8_t* const out = output();
944   const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
945
946   SetConstantInput(127);
947
948   for (int frac = 0; frac < 16; ++frac) {
949     for (int step = 1; step <= 32; ++step) {
950       /* Test the horizontal and vertical filters in combination. */
951       ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
952                                           eighttap[frac], step,
953                                           eighttap[frac], step,
954                                           Width(), Height()));
955
956       CheckGuardBlocks();
957
958       for (int y = 0; y < Height(); ++y) {
959         for (int x = 0; x < Width(); ++x) {
960           ASSERT_EQ(lookup(in, y * kInputStride + x),
961                     lookup(out, y * kOutputStride + x))
962               << "x == " << x << ", y == " << y
963               << ", frac == " << frac << ", step == " << step;
964         }
965       }
966     }
967   }
968 }
969
970 using std::tr1::make_tuple;
971
972 #if CONFIG_VP9_HIGHBITDEPTH
973 #if HAVE_SSE2 && ARCH_X86_64
974 void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
975                                  uint8_t *dst, ptrdiff_t dst_stride,
976                                  const int16_t *filter_x,
977                                  int filter_x_stride,
978                                  const int16_t *filter_y,
979                                  int filter_y_stride,
980                                  int w, int h) {
981   vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
982                                 filter_x_stride, filter_y, filter_y_stride,
983                                 w, h, 8);
984 }
985
986 void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
987                                      uint8_t *dst, ptrdiff_t dst_stride,
988                                      const int16_t *filter_x,
989                                      int filter_x_stride,
990                                      const int16_t *filter_y,
991                                      int filter_y_stride,
992                                      int w, int h) {
993   vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
994     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
995 }
996
997 void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
998                                 uint8_t *dst, ptrdiff_t dst_stride,
999                                 const int16_t *filter_x,
1000                                 int filter_x_stride,
1001                                 const int16_t *filter_y,
1002                                 int filter_y_stride,
1003                                 int w, int h) {
1004   vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1005     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1006 }
1007
1008 void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1009                                     uint8_t *dst, ptrdiff_t dst_stride,
1010                                     const int16_t *filter_x,
1011                                     int filter_x_stride,
1012                                     const int16_t *filter_y,
1013                                     int filter_y_stride,
1014                                     int w, int h) {
1015   vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1016                                    filter_x_stride, filter_y, filter_y_stride,
1017                                    w, h, 8);
1018 }
1019
1020 void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1021                            uint8_t *dst, ptrdiff_t dst_stride,
1022                            const int16_t *filter_x,
1023                            int filter_x_stride,
1024                            const int16_t *filter_y,
1025                            int filter_y_stride,
1026                            int w, int h) {
1027   vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1028     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1029 }
1030
1031 void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1032                                uint8_t *dst, ptrdiff_t dst_stride,
1033                                const int16_t *filter_x,
1034                                int filter_x_stride,
1035                                const int16_t *filter_y,
1036                                int filter_y_stride,
1037                                int w, int h) {
1038   vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1039     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1040 }
1041
1042 void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1043                                   uint8_t *dst, ptrdiff_t dst_stride,
1044                                   const int16_t *filter_x,
1045                                   int filter_x_stride,
1046                                   const int16_t *filter_y,
1047                                   int filter_y_stride,
1048                                   int w, int h) {
1049   vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1050     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1051 }
1052
1053 void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1054                                       uint8_t *dst, ptrdiff_t dst_stride,
1055                                       const int16_t *filter_x,
1056                                       int filter_x_stride,
1057                                       const int16_t *filter_y,
1058                                       int filter_y_stride,
1059                                       int w, int h) {
1060   vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1061     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1062 }
1063
1064 void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1065                                  uint8_t *dst, ptrdiff_t dst_stride,
1066                                  const int16_t *filter_x,
1067                                  int filter_x_stride,
1068                                  const int16_t *filter_y,
1069                                  int filter_y_stride,
1070                                  int w, int h) {
1071   vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1072     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1073 }
1074
1075 void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1076                                      uint8_t *dst, ptrdiff_t dst_stride,
1077                                      const int16_t *filter_x,
1078                                      int filter_x_stride,
1079                                      const int16_t *filter_y,
1080                                      int filter_y_stride,
1081                                      int w, int h) {
1082   vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1083     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1084 }
1085
1086 void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1087                             uint8_t *dst, ptrdiff_t dst_stride,
1088                             const int16_t *filter_x,
1089                             int filter_x_stride,
1090                             const int16_t *filter_y,
1091                             int filter_y_stride,
1092                             int w, int h) {
1093   vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1094     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1095 }
1096
1097 void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1098                                 uint8_t *dst, ptrdiff_t dst_stride,
1099                                 const int16_t *filter_x,
1100                                 int filter_x_stride,
1101                                 const int16_t *filter_y,
1102                                 int filter_y_stride,
1103                                 int w, int h) {
1104   vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1105                               filter_x_stride, filter_y, filter_y_stride,
1106                               w, h, 10);
1107 }
1108
1109 void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1110                                   uint8_t *dst, ptrdiff_t dst_stride,
1111                                   const int16_t *filter_x,
1112                                   int filter_x_stride,
1113                                   const int16_t *filter_y,
1114                                   int filter_y_stride,
1115                                   int w, int h) {
1116   vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1117                                 filter_x_stride, filter_y, filter_y_stride,
1118                                 w, h, 12);
1119 }
1120
1121 void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1122                                       uint8_t *dst, ptrdiff_t dst_stride,
1123                                       const int16_t *filter_x,
1124                                       int filter_x_stride,
1125                                       const int16_t *filter_y,
1126                                       int filter_y_stride,
1127                                       int w, int h) {
1128   vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1129                                     filter_x_stride, filter_y, filter_y_stride,
1130                                     w, h, 12);
1131 }
1132
1133 void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1134                                  uint8_t *dst, ptrdiff_t dst_stride,
1135                                  const int16_t *filter_x,
1136                                  int filter_x_stride,
1137                                  const int16_t *filter_y,
1138                                  int filter_y_stride,
1139                                  int w, int h) {
1140   vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1141                                filter_x_stride, filter_y, filter_y_stride,
1142                                w, h, 12);
1143 }
1144
1145 void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1146                                      uint8_t *dst, ptrdiff_t dst_stride,
1147                                      const int16_t *filter_x,
1148                                      int filter_x_stride,
1149                                      const int16_t *filter_y,
1150                                      int filter_y_stride,
1151                                      int w, int h) {
1152   vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1153                                    filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1154 }
1155
1156 void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1157                             uint8_t *dst, ptrdiff_t dst_stride,
1158                             const int16_t *filter_x,
1159                             int filter_x_stride,
1160                             const int16_t *filter_y,
1161                             int filter_y_stride,
1162                             int w, int h) {
1163   vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1164     filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1165 }
1166
1167 void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1168                                 uint8_t *dst, ptrdiff_t dst_stride,
1169                                 const int16_t *filter_x,
1170                                 int filter_x_stride,
1171                                 const int16_t *filter_y,
1172                                 int filter_y_stride,
1173                                 int w, int h) {
1174   vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1175                               filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1176 }
1177 #endif  // HAVE_SSE2 && ARCH_X86_64
1178
1179 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1180                               uint8_t *dst, ptrdiff_t dst_stride,
1181                               const int16_t *filter_x,
1182                               int filter_x_stride,
1183                               const int16_t *filter_y,
1184                               int filter_y_stride,
1185                               int w, int h) {
1186   vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1187                              filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1188 }
1189
1190 void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1191                                   uint8_t *dst, ptrdiff_t dst_stride,
1192                                   const int16_t *filter_x,
1193                                   int filter_x_stride,
1194                                   const int16_t *filter_y,
1195                                   int filter_y_stride,
1196                                   int w, int h) {
1197   vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1198                                  filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1199 }
1200
1201 void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1202                              uint8_t *dst, ptrdiff_t dst_stride,
1203                              const int16_t *filter_x,
1204                              int filter_x_stride,
1205                              const int16_t *filter_y,
1206                              int filter_y_stride,
1207                              int w, int h) {
1208   vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1209                             filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1210 }
1211
1212 void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1213                                  uint8_t *dst, ptrdiff_t dst_stride,
1214                                  const int16_t *filter_x,
1215                                  int filter_x_stride,
1216                                  const int16_t *filter_y,
1217                                  int filter_y_stride,
1218                                  int w, int h) {
1219   vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1220                                 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1221 }
1222
1223 void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
1224                         uint8_t *dst, ptrdiff_t dst_stride,
1225                         const int16_t *filter_x,
1226                         int filter_x_stride,
1227                         const int16_t *filter_y,
1228                         int filter_y_stride,
1229                         int w, int h) {
1230   vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1231                        filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1232 }
1233
1234 void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1235                             uint8_t *dst, ptrdiff_t dst_stride,
1236                             const int16_t *filter_x,
1237                             int filter_x_stride,
1238                             const int16_t *filter_y,
1239                             int filter_y_stride,
1240                             int w, int h) {
1241   vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1242                            filter_x_stride, filter_y, filter_y_stride,
1243                            w, h, 8);
1244 }
1245
1246 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1247                                uint8_t *dst, ptrdiff_t dst_stride,
1248                                const int16_t *filter_x,
1249                                int filter_x_stride,
1250                                const int16_t *filter_y,
1251                                int filter_y_stride,
1252                                int w, int h) {
1253   vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1254                              filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1255 }
1256
1257 void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1258                                    uint8_t *dst, ptrdiff_t dst_stride,
1259                                    const int16_t *filter_x,
1260                                    int filter_x_stride,
1261                                    const int16_t *filter_y,
1262                                    int filter_y_stride,
1263                                    int w, int h) {
1264   vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1265                                  filter_x_stride, filter_y, filter_y_stride,
1266                                  w, h, 10);
1267 }
1268
1269 void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1270                               uint8_t *dst, ptrdiff_t dst_stride,
1271                               const int16_t *filter_x,
1272                               int filter_x_stride,
1273                               const int16_t *filter_y,
1274                               int filter_y_stride,
1275                               int w, int h) {
1276   vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1277                             filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1278 }
1279
1280 void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1281                                   uint8_t *dst, ptrdiff_t dst_stride,
1282                                   const int16_t *filter_x,
1283                                   int filter_x_stride,
1284                                   const int16_t *filter_y,
1285                                   int filter_y_stride,
1286                                   int w, int h) {
1287   vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1288                                 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1289 }
1290
1291 void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
1292                          uint8_t *dst, ptrdiff_t dst_stride,
1293                          const int16_t *filter_x,
1294                          int filter_x_stride,
1295                          const int16_t *filter_y,
1296                          int filter_y_stride,
1297                          int w, int h) {
1298   vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1299     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1300 }
1301
1302 void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1303                              uint8_t *dst, ptrdiff_t dst_stride,
1304                              const int16_t *filter_x,
1305                              int filter_x_stride,
1306                              const int16_t *filter_y,
1307                              int filter_y_stride,
1308                              int w, int h) {
1309   vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1310                            filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1311 }
1312
1313 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1314                                uint8_t *dst, ptrdiff_t dst_stride,
1315                                const int16_t *filter_x,
1316                                int filter_x_stride,
1317                                const int16_t *filter_y,
1318                                int filter_y_stride,
1319                                int w, int h) {
1320   vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1321                              filter_x_stride, filter_y, filter_y_stride,
1322                              w, h, 12);
1323 }
1324
1325 void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1326                                    uint8_t *dst, ptrdiff_t dst_stride,
1327                                    const int16_t *filter_x,
1328                                    int filter_x_stride,
1329                                    const int16_t *filter_y,
1330                                    int filter_y_stride,
1331                                    int w, int h) {
1332   vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1333                                  filter_x_stride, filter_y, filter_y_stride,
1334                                  w, h, 12);
1335 }
1336
1337 void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1338                               uint8_t *dst, ptrdiff_t dst_stride,
1339                               const int16_t *filter_x,
1340                               int filter_x_stride,
1341                               const int16_t *filter_y,
1342                               int filter_y_stride,
1343                               int w, int h) {
1344   vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1345                             filter_x_stride, filter_y, filter_y_stride,
1346                             w, h, 12);
1347 }
1348
1349 void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1350                                   uint8_t *dst, ptrdiff_t dst_stride,
1351                                   const int16_t *filter_x,
1352                                   int filter_x_stride,
1353                                   const int16_t *filter_y,
1354                                   int filter_y_stride,
1355                                   int w, int h) {
1356   vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1357                                 filter_x_stride, filter_y, filter_y_stride,
1358                                 w, h, 12);
1359 }
1360
1361 void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
1362                          uint8_t *dst, ptrdiff_t dst_stride,
1363                          const int16_t *filter_x,
1364                          int filter_x_stride,
1365                          const int16_t *filter_y,
1366                          int filter_y_stride,
1367                          int w, int h) {
1368   vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1369                        filter_x_stride, filter_y, filter_y_stride,
1370                        w, h, 12);
1371 }
1372
1373 void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1374                              uint8_t *dst, ptrdiff_t dst_stride,
1375                              const int16_t *filter_x,
1376                              int filter_x_stride,
1377                              const int16_t *filter_y,
1378                              int filter_y_stride,
1379                              int w, int h) {
1380   vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1381                            filter_x_stride, filter_y, filter_y_stride,
1382                            w, h, 12);
1383 }
1384
1385 const ConvolveFunctions convolve8_c(
1386     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1387     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1388     wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1389 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1390     make_tuple(4, 4, &convolve8_c),
1391     make_tuple(8, 4, &convolve8_c),
1392     make_tuple(4, 8, &convolve8_c),
1393     make_tuple(8, 8, &convolve8_c),
1394     make_tuple(16, 8, &convolve8_c),
1395     make_tuple(8, 16, &convolve8_c),
1396     make_tuple(16, 16, &convolve8_c),
1397     make_tuple(32, 16, &convolve8_c),
1398     make_tuple(16, 32, &convolve8_c),
1399     make_tuple(32, 32, &convolve8_c),
1400     make_tuple(64, 32, &convolve8_c),
1401     make_tuple(32, 64, &convolve8_c),
1402     make_tuple(64, 64, &convolve8_c)));
1403 const ConvolveFunctions convolve10_c(
1404     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1405     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1406     wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1407 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1408     make_tuple(4, 4, &convolve10_c),
1409     make_tuple(8, 4, &convolve10_c),
1410     make_tuple(4, 8, &convolve10_c),
1411     make_tuple(8, 8, &convolve10_c),
1412     make_tuple(16, 8, &convolve10_c),
1413     make_tuple(8, 16, &convolve10_c),
1414     make_tuple(16, 16, &convolve10_c),
1415     make_tuple(32, 16, &convolve10_c),
1416     make_tuple(16, 32, &convolve10_c),
1417     make_tuple(32, 32, &convolve10_c),
1418     make_tuple(64, 32, &convolve10_c),
1419     make_tuple(32, 64, &convolve10_c),
1420     make_tuple(64, 64, &convolve10_c)));
1421 const ConvolveFunctions convolve12_c(
1422     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1423     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1424     wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1425 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1426     make_tuple(4, 4, &convolve12_c),
1427     make_tuple(8, 4, &convolve12_c),
1428     make_tuple(4, 8, &convolve12_c),
1429     make_tuple(8, 8, &convolve12_c),
1430     make_tuple(16, 8, &convolve12_c),
1431     make_tuple(8, 16, &convolve12_c),
1432     make_tuple(16, 16, &convolve12_c),
1433     make_tuple(32, 16, &convolve12_c),
1434     make_tuple(16, 32, &convolve12_c),
1435     make_tuple(32, 32, &convolve12_c),
1436     make_tuple(64, 32, &convolve12_c),
1437     make_tuple(32, 64, &convolve12_c),
1438     make_tuple(64, 64, &convolve12_c)));
1439
1440 #else
1441
1442 const ConvolveFunctions convolve8_c(
1443     vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
1444     vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
1445     vp9_convolve8_c, vp9_convolve8_avg_c, 0);
1446
1447 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1448     make_tuple(4, 4, &convolve8_c),
1449     make_tuple(8, 4, &convolve8_c),
1450     make_tuple(4, 8, &convolve8_c),
1451     make_tuple(8, 8, &convolve8_c),
1452     make_tuple(16, 8, &convolve8_c),
1453     make_tuple(8, 16, &convolve8_c),
1454     make_tuple(16, 16, &convolve8_c),
1455     make_tuple(32, 16, &convolve8_c),
1456     make_tuple(16, 32, &convolve8_c),
1457     make_tuple(32, 32, &convolve8_c),
1458     make_tuple(64, 32, &convolve8_c),
1459     make_tuple(32, 64, &convolve8_c),
1460     make_tuple(64, 64, &convolve8_c)));
1461 #endif
1462
1463 #if HAVE_SSE2 && ARCH_X86_64
1464 #if CONFIG_VP9_HIGHBITDEPTH
1465 const ConvolveFunctions convolve8_sse2(
1466     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1467     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1468     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1469 INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
1470     make_tuple(4, 4, &convolve8_sse2),
1471     make_tuple(8, 4, &convolve8_sse2),
1472     make_tuple(4, 8, &convolve8_sse2),
1473     make_tuple(8, 8, &convolve8_sse2),
1474     make_tuple(16, 8, &convolve8_sse2),
1475     make_tuple(8, 16, &convolve8_sse2),
1476     make_tuple(16, 16, &convolve8_sse2),
1477     make_tuple(32, 16, &convolve8_sse2),
1478     make_tuple(16, 32, &convolve8_sse2),
1479     make_tuple(32, 32, &convolve8_sse2),
1480     make_tuple(64, 32, &convolve8_sse2),
1481     make_tuple(32, 64, &convolve8_sse2),
1482     make_tuple(64, 64, &convolve8_sse2)));
1483 const ConvolveFunctions convolve10_sse2(
1484     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1485     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1486     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1487 INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
1488     make_tuple(4, 4, &convolve10_sse2),
1489     make_tuple(8, 4, &convolve10_sse2),
1490     make_tuple(4, 8, &convolve10_sse2),
1491     make_tuple(8, 8, &convolve10_sse2),
1492     make_tuple(16, 8, &convolve10_sse2),
1493     make_tuple(8, 16, &convolve10_sse2),
1494     make_tuple(16, 16, &convolve10_sse2),
1495     make_tuple(32, 16, &convolve10_sse2),
1496     make_tuple(16, 32, &convolve10_sse2),
1497     make_tuple(32, 32, &convolve10_sse2),
1498     make_tuple(64, 32, &convolve10_sse2),
1499     make_tuple(32, 64, &convolve10_sse2),
1500     make_tuple(64, 64, &convolve10_sse2)));
1501 const ConvolveFunctions convolve12_sse2(
1502     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1503     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1504     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1505 INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
1506     make_tuple(4, 4, &convolve12_sse2),
1507     make_tuple(8, 4, &convolve12_sse2),
1508     make_tuple(4, 8, &convolve12_sse2),
1509     make_tuple(8, 8, &convolve12_sse2),
1510     make_tuple(16, 8, &convolve12_sse2),
1511     make_tuple(8, 16, &convolve12_sse2),
1512     make_tuple(16, 16, &convolve12_sse2),
1513     make_tuple(32, 16, &convolve12_sse2),
1514     make_tuple(16, 32, &convolve12_sse2),
1515     make_tuple(32, 32, &convolve12_sse2),
1516     make_tuple(64, 32, &convolve12_sse2),
1517     make_tuple(32, 64, &convolve12_sse2),
1518     make_tuple(64, 64, &convolve12_sse2)));
1519 #else
1520 const ConvolveFunctions convolve8_sse2(
1521     vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
1522     vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
1523     vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
1524
1525 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1526     make_tuple(4, 4, &convolve8_sse2),
1527     make_tuple(8, 4, &convolve8_sse2),
1528     make_tuple(4, 8, &convolve8_sse2),
1529     make_tuple(8, 8, &convolve8_sse2),
1530     make_tuple(16, 8, &convolve8_sse2),
1531     make_tuple(8, 16, &convolve8_sse2),
1532     make_tuple(16, 16, &convolve8_sse2),
1533     make_tuple(32, 16, &convolve8_sse2),
1534     make_tuple(16, 32, &convolve8_sse2),
1535     make_tuple(32, 32, &convolve8_sse2),
1536     make_tuple(64, 32, &convolve8_sse2),
1537     make_tuple(32, 64, &convolve8_sse2),
1538     make_tuple(64, 64, &convolve8_sse2)));
1539 #endif  // CONFIG_VP9_HIGHBITDEPTH
1540 #endif
1541
1542 #if HAVE_SSSE3
1543 const ConvolveFunctions convolve8_ssse3(
1544     vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
1545     vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
1546     vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
1547
1548 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1549     make_tuple(4, 4, &convolve8_ssse3),
1550     make_tuple(8, 4, &convolve8_ssse3),
1551     make_tuple(4, 8, &convolve8_ssse3),
1552     make_tuple(8, 8, &convolve8_ssse3),
1553     make_tuple(16, 8, &convolve8_ssse3),
1554     make_tuple(8, 16, &convolve8_ssse3),
1555     make_tuple(16, 16, &convolve8_ssse3),
1556     make_tuple(32, 16, &convolve8_ssse3),
1557     make_tuple(16, 32, &convolve8_ssse3),
1558     make_tuple(32, 32, &convolve8_ssse3),
1559     make_tuple(64, 32, &convolve8_ssse3),
1560     make_tuple(32, 64, &convolve8_ssse3),
1561     make_tuple(64, 64, &convolve8_ssse3)));
1562 #endif
1563
1564 #if HAVE_AVX2 && HAVE_SSSE3
1565 const ConvolveFunctions convolve8_avx2(
1566     vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
1567     vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
1568     vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
1569
1570 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1571     make_tuple(4, 4, &convolve8_avx2),
1572     make_tuple(8, 4, &convolve8_avx2),
1573     make_tuple(4, 8, &convolve8_avx2),
1574     make_tuple(8, 8, &convolve8_avx2),
1575     make_tuple(8, 16, &convolve8_avx2),
1576     make_tuple(16, 8, &convolve8_avx2),
1577     make_tuple(16, 16, &convolve8_avx2),
1578     make_tuple(32, 16, &convolve8_avx2),
1579     make_tuple(16, 32, &convolve8_avx2),
1580     make_tuple(32, 32, &convolve8_avx2),
1581     make_tuple(64, 32, &convolve8_avx2),
1582     make_tuple(32, 64, &convolve8_avx2),
1583     make_tuple(64, 64, &convolve8_avx2)));
1584 #endif  // HAVE_AVX2 && HAVE_SSSE3
1585
1586 #if HAVE_NEON_ASM
1587 const ConvolveFunctions convolve8_neon(
1588     vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
1589     vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
1590     vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
1591
1592 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1593     make_tuple(4, 4, &convolve8_neon),
1594     make_tuple(8, 4, &convolve8_neon),
1595     make_tuple(4, 8, &convolve8_neon),
1596     make_tuple(8, 8, &convolve8_neon),
1597     make_tuple(16, 8, &convolve8_neon),
1598     make_tuple(8, 16, &convolve8_neon),
1599     make_tuple(16, 16, &convolve8_neon),
1600     make_tuple(32, 16, &convolve8_neon),
1601     make_tuple(16, 32, &convolve8_neon),
1602     make_tuple(32, 32, &convolve8_neon),
1603     make_tuple(64, 32, &convolve8_neon),
1604     make_tuple(32, 64, &convolve8_neon),
1605     make_tuple(64, 64, &convolve8_neon)));
1606 #endif
1607
1608 #if HAVE_DSPR2
1609 const ConvolveFunctions convolve8_dspr2(
1610     vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
1611     vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
1612     vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
1613
1614 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1615     make_tuple(4, 4, &convolve8_dspr2),
1616     make_tuple(8, 4, &convolve8_dspr2),
1617     make_tuple(4, 8, &convolve8_dspr2),
1618     make_tuple(8, 8, &convolve8_dspr2),
1619     make_tuple(16, 8, &convolve8_dspr2),
1620     make_tuple(8, 16, &convolve8_dspr2),
1621     make_tuple(16, 16, &convolve8_dspr2),
1622     make_tuple(32, 16, &convolve8_dspr2),
1623     make_tuple(16, 32, &convolve8_dspr2),
1624     make_tuple(32, 32, &convolve8_dspr2),
1625     make_tuple(64, 32, &convolve8_dspr2),
1626     make_tuple(32, 64, &convolve8_dspr2),
1627     make_tuple(64, 64, &convolve8_dspr2)));
1628 #endif
1629 }  // namespace