src/third_party/libvpx/source/libvpx/test/convolve_test.cc

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <string.h>
  12 #include "test/acm_random.h"
  13 #include "test/register_state_check.h"
  14 #include "test/util.h"
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16
  17 #include "./vpx_config.h"
  18 #include "./vp9_rtcd.h"
  19 #include "vp9/common/vp9_filter.h"
  20 #include "vpx_mem/vpx_mem.h"
  21 #include "vpx_ports/mem.h"
  22
  23 namespace {
  24
  25 static const unsigned int kMaxDimension = 64;
  26
  27 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
  28                              uint8_t *dst, ptrdiff_t dst_stride,
  29                              const int16_t *filter_x, int filter_x_stride,
  30                              const int16_t *filter_y, int filter_y_stride,
  31                              int w, int h);
  32
  33 struct ConvolveFunctions {
  34   ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
  35                     ConvolveFunc v8, ConvolveFunc v8_avg,
  36                     ConvolveFunc hv8, ConvolveFunc hv8_avg,
  37                     int bd)
  38       : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
  39         hv8_avg_(hv8_avg), use_high_bd_(bd) {}
  40
  41   ConvolveFunc h8_;
  42   ConvolveFunc v8_;
  43   ConvolveFunc hv8_;
  44   ConvolveFunc h8_avg_;
  45   ConvolveFunc v8_avg_;
  46   ConvolveFunc hv8_avg_;
  47   int use_high_bd_;  // 0 if high bitdepth not used, else the actual bit depth.
  48 };
  49
  50 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
  51
  52 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
  53 #define VP9_FILTER_WEIGHT 128
  54 #define VP9_FILTER_SHIFT 7
  55 uint8_t clip_pixel(int x) {
  56   return x < 0 ? 0 :
  57          x > 255 ? 255 :
  58          x;
  59 }
  60
  61 void filter_block2d_8_c(const uint8_t *src_ptr,
  62                         const unsigned int src_stride,
  63                         const int16_t *HFilter,
  64                         const int16_t *VFilter,
  65                         uint8_t *dst_ptr,
  66                         unsigned int dst_stride,
  67                         unsigned int output_width,
  68                         unsigned int output_height) {
  69   // Between passes, we use an intermediate buffer whose height is extended to
  70   // have enough horizontally filtered values as input for the vertical pass.
  71   // This buffer is allocated to be big enough for the largest block type we
  72   // support.
  73   const int kInterp_Extend = 4;
  74   const unsigned int intermediate_height =
  75       (kInterp_Extend - 1) + output_height + kInterp_Extend;
  76   unsigned int i, j;
  77
  78   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  79   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  80   //                                 + kInterp_Extend
  81   //                               = 3 + 16 + 4
  82   //                               = 23
  83   // and filter_max_width          = 16
  84   //
  85   uint8_t intermediate_buffer[71 * kMaxDimension];
  86   const int intermediate_next_stride = 1 - intermediate_height * output_width;
  87
  88   // Horizontal pass (src -> transposed intermediate).
  89   uint8_t *output_ptr = intermediate_buffer;
  90   const int src_next_row_stride = src_stride - output_width;
  91   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
  92   for (i = 0; i < intermediate_height; ++i) {
  93     for (j = 0; j < output_width; ++j) {
  94       // Apply filter...
  95       const int temp = (src_ptr[0] * HFilter[0]) +
  96           (src_ptr[1] * HFilter[1]) +
  97           (src_ptr[2] * HFilter[2]) +
  98           (src_ptr[3] * HFilter[3]) +
  99           (src_ptr[4] * HFilter[4]) +
 100           (src_ptr[5] * HFilter[5]) +
 101           (src_ptr[6] * HFilter[6]) +
 102           (src_ptr[7] * HFilter[7]) +
 103           (VP9_FILTER_WEIGHT >> 1);  // Rounding
 104
 105       // Normalize back to 0-255...
 106       *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
 107       ++src_ptr;
 108       output_ptr += intermediate_height;
 109     }
 110     src_ptr += src_next_row_stride;
 111     output_ptr += intermediate_next_stride;
 112   }
 113
 114   // Vertical pass (transposed intermediate -> dst).
 115   src_ptr = intermediate_buffer;
 116   const int dst_next_row_stride = dst_stride - output_width;
 117   for (i = 0; i < output_height; ++i) {
 118     for (j = 0; j < output_width; ++j) {
 119       // Apply filter...
 120       const int temp = (src_ptr[0] * VFilter[0]) +
 121           (src_ptr[1] * VFilter[1]) +
 122           (src_ptr[2] * VFilter[2]) +
 123           (src_ptr[3] * VFilter[3]) +
 124           (src_ptr[4] * VFilter[4]) +
 125           (src_ptr[5] * VFilter[5]) +
 126           (src_ptr[6] * VFilter[6]) +
 127           (src_ptr[7] * VFilter[7]) +
 128           (VP9_FILTER_WEIGHT >> 1);  // Rounding
 129
 130       // Normalize back to 0-255...
 131       *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
 132       src_ptr += intermediate_height;
 133     }
 134     src_ptr += intermediate_next_stride;
 135     dst_ptr += dst_next_row_stride;
 136   }
 137 }
 138
 139 void block2d_average_c(uint8_t *src,
 140                        unsigned int src_stride,
 141                        uint8_t *output_ptr,
 142                        unsigned int output_stride,
 143                        unsigned int output_width,
 144                        unsigned int output_height) {
 145   unsigned int i, j;
 146   for (i = 0; i < output_height; ++i) {
 147     for (j = 0; j < output_width; ++j) {
 148       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 149     }
 150     output_ptr += output_stride;
 151   }
 152 }
 153
 154 void filter_average_block2d_8_c(const uint8_t *src_ptr,
 155                                 const unsigned int src_stride,
 156                                 const int16_t *HFilter,
 157                                 const int16_t *VFilter,
 158                                 uint8_t *dst_ptr,
 159                                 unsigned int dst_stride,
 160                                 unsigned int output_width,
 161                                 unsigned int output_height) {
 162   uint8_t tmp[kMaxDimension * kMaxDimension];
 163
 164   assert(output_width <= kMaxDimension);
 165   assert(output_height <= kMaxDimension);
 166   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
 167                      output_width, output_height);
 168   block2d_average_c(tmp, 64, dst_ptr, dst_stride,
 169                     output_width, output_height);
 170 }
 171
 172 #if CONFIG_VP9_HIGHBITDEPTH
 173 void high_filter_block2d_8_c(const uint16_t *src_ptr,
 174                              const unsigned int src_stride,
 175                              const int16_t *HFilter,
 176                              const int16_t *VFilter,
 177                              uint16_t *dst_ptr,
 178                              unsigned int dst_stride,
 179                              unsigned int output_width,
 180                              unsigned int output_height,
 181                              int bd) {
 182   // Between passes, we use an intermediate buffer whose height is extended to
 183   // have enough horizontally filtered values as input for the vertical pass.
 184   // This buffer is allocated to be big enough for the largest block type we
 185   // support.
 186   const int kInterp_Extend = 4;
 187   const unsigned int intermediate_height =
 188       (kInterp_Extend - 1) + output_height + kInterp_Extend;
 189
 190   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
 191    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
 192    *                                 + kInterp_Extend
 193    *                               = 3 + 16 + 4
 194    *                               = 23
 195    * and filter_max_width = 16
 196    */
 197   uint16_t intermediate_buffer[71 * kMaxDimension];
 198   const int intermediate_next_stride = 1 - intermediate_height * output_width;
 199
 200   // Horizontal pass (src -> transposed intermediate).
 201   {
 202     uint16_t *output_ptr = intermediate_buffer;
 203     const int src_next_row_stride = src_stride - output_width;
 204     unsigned int i, j;
 205     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
 206     for (i = 0; i < intermediate_height; ++i) {
 207       for (j = 0; j < output_width; ++j) {
 208         // Apply filter...
 209         const int temp = (src_ptr[0] * HFilter[0]) +
 210                          (src_ptr[1] * HFilter[1]) +
 211                          (src_ptr[2] * HFilter[2]) +
 212                          (src_ptr[3] * HFilter[3]) +
 213                          (src_ptr[4] * HFilter[4]) +
 214                          (src_ptr[5] * HFilter[5]) +
 215                          (src_ptr[6] * HFilter[6]) +
 216                          (src_ptr[7] * HFilter[7]) +
 217                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
 218
 219         // Normalize back to 0-255...
 220         *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
 221         ++src_ptr;
 222         output_ptr += intermediate_height;
 223       }
 224       src_ptr += src_next_row_stride;
 225       output_ptr += intermediate_next_stride;
 226     }
 227   }
 228
 229   // Vertical pass (transposed intermediate -> dst).
 230   {
 231     uint16_t *src_ptr = intermediate_buffer;
 232     const int dst_next_row_stride = dst_stride - output_width;
 233     unsigned int i, j;
 234     for (i = 0; i < output_height; ++i) {
 235       for (j = 0; j < output_width; ++j) {
 236         // Apply filter...
 237         const int temp = (src_ptr[0] * VFilter[0]) +
 238                          (src_ptr[1] * VFilter[1]) +
 239                          (src_ptr[2] * VFilter[2]) +
 240                          (src_ptr[3] * VFilter[3]) +
 241                          (src_ptr[4] * VFilter[4]) +
 242                          (src_ptr[5] * VFilter[5]) +
 243                          (src_ptr[6] * VFilter[6]) +
 244                          (src_ptr[7] * VFilter[7]) +
 245                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
 246
 247         // Normalize back to 0-255...
 248         *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
 249         src_ptr += intermediate_height;
 250       }
 251       src_ptr += intermediate_next_stride;
 252       dst_ptr += dst_next_row_stride;
 253     }
 254   }
 255 }
 256
 257 void high_block2d_average_c(uint16_t *src,
 258                             unsigned int src_stride,
 259                             uint16_t *output_ptr,
 260                             unsigned int output_stride,
 261                             unsigned int output_width,
 262                             unsigned int output_height,
 263                             int bd) {
 264   unsigned int i, j;
 265   for (i = 0; i < output_height; ++i) {
 266     for (j = 0; j < output_width; ++j) {
 267       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 268     }
 269     output_ptr += output_stride;
 270   }
 271 }
 272
 273 void high_filter_average_block2d_8_c(const uint16_t *src_ptr,
 274                                      const unsigned int src_stride,
 275                                      const int16_t *HFilter,
 276                                      const int16_t *VFilter,
 277                                      uint16_t *dst_ptr,
 278                                      unsigned int dst_stride,
 279                                      unsigned int output_width,
 280                                      unsigned int output_height,
 281                                      int bd) {
 282   uint16_t tmp[kMaxDimension * kMaxDimension];
 283
 284   assert(output_width <= kMaxDimension);
 285   assert(output_height <= kMaxDimension);
 286   high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
 287                           output_width, output_height, bd);
 288   high_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
 289                          output_width, output_height, bd);
 290 }
 291 #endif  // CONFIG_VP9_HIGHBITDEPTH
 292
 293 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 294  public:
 295   static void SetUpTestCase() {
 296     // Force input_ to be unaligned, output to be 16 byte aligned.
 297     input_ = reinterpret_cast<uint8_t*>(
 298         vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
 299     output_ = reinterpret_cast<uint8_t*>(
 300         vpx_memalign(kDataAlignment, kOutputBufferSize));
 301 #if CONFIG_VP9_HIGHBITDEPTH
 302     input16_ = reinterpret_cast<uint16_t*>(
 303         vpx_memalign(kDataAlignment,
 304                      (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
 305     output16_ = reinterpret_cast<uint16_t*>(
 306         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 307 #endif
 308   }
 309
 310   static void TearDownTestCase() {
 311     vpx_free(input_ - 1);
 312     input_ = NULL;
 313     vpx_free(output_);
 314     output_ = NULL;
 315 #if CONFIG_VP9_HIGHBITDEPTH
 316     vpx_free(input16_ - 1);
 317     input16_ = NULL;
 318     vpx_free(output16_);
 319     output16_ = NULL;
 320 #endif
 321   }
 322
 323  protected:
 324   static const int kDataAlignment = 16;
 325   static const int kOuterBlockSize = 256;
 326   static const int kInputStride = kOuterBlockSize;
 327   static const int kOutputStride = kOuterBlockSize;
 328   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
 329   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
 330
 331   int Width() const { return GET_PARAM(0); }
 332   int Height() const { return GET_PARAM(1); }
 333   int BorderLeft() const {
 334     const int center = (kOuterBlockSize - Width()) / 2;
 335     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
 336   }
 337   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
 338
 339   bool IsIndexInBorder(int i) {
 340     return (i < BorderTop() * kOuterBlockSize ||
 341             i >= (BorderTop() + Height()) * kOuterBlockSize ||
 342             i % kOuterBlockSize < BorderLeft() ||
 343             i % kOuterBlockSize >= (BorderLeft() + Width()));
 344   }
 345
 346   virtual void SetUp() {
 347     UUT_ = GET_PARAM(2);
 348 #if CONFIG_VP9_HIGHBITDEPTH
 349     if (UUT_->use_high_bd_ != 0)
 350       mask_ = (1 << UUT_->use_high_bd_) - 1;
 351     else
 352       mask_ = 255;
 353 #endif
 354     /* Set up guard blocks for an inner block centered in the outer block */
 355     for (int i = 0; i < kOutputBufferSize; ++i) {
 356       if (IsIndexInBorder(i))
 357         output_[i] = 255;
 358       else
 359         output_[i] = 0;
 360     }
 361
 362     ::libvpx_test::ACMRandom prng;
 363     for (int i = 0; i < kInputBufferSize; ++i) {
 364       if (i & 1) {
 365         input_[i] = 255;
 366 #if CONFIG_VP9_HIGHBITDEPTH
 367         input16_[i] = mask_;
 368 #endif
 369       } else {
 370         input_[i] = prng.Rand8Extremes();
 371 #if CONFIG_VP9_HIGHBITDEPTH
 372         input16_[i] = prng.Rand16() & mask_;
 373 #endif
 374       }
 375     }
 376   }
 377
 378   void SetConstantInput(int value) {
 379     memset(input_, value, kInputBufferSize);
 380 #if CONFIG_VP9_HIGHBITDEPTH
 381     vpx_memset16(input16_, value, kInputBufferSize);
 382 #endif
 383   }
 384
 385   void CheckGuardBlocks() {
 386     for (int i = 0; i < kOutputBufferSize; ++i) {
 387       if (IsIndexInBorder(i))
 388         EXPECT_EQ(255, output_[i]);
 389     }
 390   }
 391
 392   uint8_t *input() const {
 393 #if CONFIG_VP9_HIGHBITDEPTH
 394     if (UUT_->use_high_bd_ == 0) {
 395       return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
 396     } else {
 397       return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize +
 398                                 BorderLeft());
 399     }
 400 #else
 401     return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
 402 #endif
 403   }
 404
 405   uint8_t *output() const {
 406 #if CONFIG_VP9_HIGHBITDEPTH
 407     if (UUT_->use_high_bd_ == 0) {
 408       return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
 409     } else {
 410       return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize +
 411                                 BorderLeft());
 412     }
 413 #else
 414     return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
 415 #endif
 416   }
 417
 418   uint16_t lookup(uint8_t *list, int index) const {
 419 #if CONFIG_VP9_HIGHBITDEPTH
 420     if (UUT_->use_high_bd_ == 0) {
 421       return list[index];
 422     } else {
 423       return CONVERT_TO_SHORTPTR(list)[index];
 424     }
 425 #else
 426     return list[index];
 427 #endif
 428   }
 429
 430   void assign_val(uint8_t *list, int index, uint16_t val) const {
 431 #if CONFIG_VP9_HIGHBITDEPTH
 432     if (UUT_->use_high_bd_ == 0) {
 433       list[index] = (uint8_t) val;
 434     } else {
 435       CONVERT_TO_SHORTPTR(list)[index] = val;
 436     }
 437 #else
 438     list[index] = (uint8_t) val;
 439 #endif
 440   }
 441
 442   void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr,
 443                                           const unsigned int src_stride,
 444                                           const int16_t *HFilter,
 445                                           const int16_t *VFilter,
 446                                           uint8_t *dst_ptr,
 447                                           unsigned int dst_stride,
 448                                           unsigned int output_width,
 449                                           unsigned int output_height) {
 450 #if CONFIG_VP9_HIGHBITDEPTH
 451     if (UUT_->use_high_bd_ == 0) {
 452       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 453                                  dst_ptr, dst_stride, output_width,
 454                                  output_height);
 455     } else {
 456       high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
 457                                       HFilter, VFilter,
 458                                       CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
 459                                       output_width, output_height,
 460                                       UUT_->use_high_bd_);
 461     }
 462 #else
 463     filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 464                                dst_ptr, dst_stride, output_width,
 465                                output_height);
 466 #endif
 467   }
 468
 469   void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
 470                                   const unsigned int src_stride,
 471                                   const int16_t *HFilter,
 472                                   const int16_t *VFilter,
 473                                   uint8_t *dst_ptr,
 474                                   unsigned int dst_stride,
 475                                   unsigned int output_width,
 476                                   unsigned int output_height) {
 477 #if CONFIG_VP9_HIGHBITDEPTH
 478     if (UUT_->use_high_bd_ == 0) {
 479       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 480                          dst_ptr, dst_stride, output_width, output_height);
 481     } else {
 482       high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
 483                               HFilter, VFilter,
 484                               CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
 485                               output_width, output_height, UUT_->use_high_bd_);
 486     }
 487 #else
 488     filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter,
 489                        dst_ptr, dst_stride, output_width, output_height);
 490 #endif
 491   }
 492
 493   const ConvolveFunctions* UUT_;
 494   static uint8_t* input_;
 495   static uint8_t* output_;
 496 #if CONFIG_VP9_HIGHBITDEPTH
 497   static uint16_t* input16_;
 498   static uint16_t* output16_;
 499   int mask_;
 500 #endif
 501 };
 502
 503 uint8_t* ConvolveTest::input_ = NULL;
 504 uint8_t* ConvolveTest::output_ = NULL;
 505 #if CONFIG_VP9_HIGHBITDEPTH
 506 uint16_t* ConvolveTest::input16_ = NULL;
 507 uint16_t* ConvolveTest::output16_ = NULL;
 508 #endif
 509
 510 TEST_P(ConvolveTest, GuardBlocks) {
 511   CheckGuardBlocks();
 512 }
 513
 514 TEST_P(ConvolveTest, CopyHoriz) {
 515   uint8_t* const in = input();
 516   uint8_t* const out = output();
 517   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 518
 519   ASM_REGISTER_STATE_CHECK(
 520       UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
 521                 Width(), Height()));
 522
 523   CheckGuardBlocks();
 524
 525   for (int y = 0; y < Height(); ++y)
 526     for (int x = 0; x < Width(); ++x)
 527       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 528                 lookup(in, y * kInputStride + x))
 529           << "(" << x << "," << y << ")";
 530 }
 531
 532 TEST_P(ConvolveTest, CopyVert) {
 533   uint8_t* const in = input();
 534   uint8_t* const out = output();
 535   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 536
 537   ASM_REGISTER_STATE_CHECK(
 538       UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
 539                 Width(), Height()));
 540
 541   CheckGuardBlocks();
 542
 543   for (int y = 0; y < Height(); ++y)
 544     for (int x = 0; x < Width(); ++x)
 545       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 546                 lookup(in, y * kInputStride + x))
 547           << "(" << x << "," << y << ")";
 548 }
 549
 550 TEST_P(ConvolveTest, Copy2D) {
 551   uint8_t* const in = input();
 552   uint8_t* const out = output();
 553   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 554
 555   ASM_REGISTER_STATE_CHECK(
 556       UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
 557                  Width(), Height()));
 558
 559   CheckGuardBlocks();
 560
 561   for (int y = 0; y < Height(); ++y)
 562     for (int x = 0; x < Width(); ++x)
 563       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 564                 lookup(in, y * kInputStride + x))
 565           << "(" << x << "," << y << ")";
 566 }
 567
 568 const int kNumFilterBanks = 4;
 569 const int kNumFilters = 16;
 570
 571 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
 572   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 573     const InterpKernel *filters =
 574         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
 575     for (int i = 0; i < kNumFilters; i++) {
 576       const int p0 = filters[i][0] + filters[i][1];
 577       const int p1 = filters[i][2] + filters[i][3];
 578       const int p2 = filters[i][4] + filters[i][5];
 579       const int p3 = filters[i][6] + filters[i][7];
 580       EXPECT_LE(p0, 128);
 581       EXPECT_LE(p1, 128);
 582       EXPECT_LE(p2, 128);
 583       EXPECT_LE(p3, 128);
 584       EXPECT_LE(p0 + p3, 128);
 585       EXPECT_LE(p0 + p3 + p1, 128);
 586       EXPECT_LE(p0 + p3 + p1 + p2, 128);
 587       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
 588     }
 589   }
 590 }
 591
 592 const int16_t kInvalidFilter[8] = { 0 };
 593
 594 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
 595   uint8_t* const in = input();
 596   uint8_t* const out = output();
 597 #if CONFIG_VP9_HIGHBITDEPTH
 598   uint8_t ref8[kOutputStride * kMaxDimension];
 599   uint16_t ref16[kOutputStride * kMaxDimension];
 600   uint8_t* ref;
 601   if (UUT_->use_high_bd_ == 0) {
 602     ref = ref8;
 603   } else {
 604     ref = CONVERT_TO_BYTEPTR(ref16);
 605   }
 606 #else
 607   uint8_t ref[kOutputStride * kMaxDimension];
 608 #endif
 609
 610   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 611     const InterpKernel *filters =
 612         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
 613     const InterpKernel *const eighttap_smooth =
 614         vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
 615
 616     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 617       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 618         wrapper_filter_block2d_8_c(in, kInputStride,
 619                                    filters[filter_x], filters[filter_y],
 620                                    ref, kOutputStride,
 621                                    Width(), Height());
 622
 623         if (filters == eighttap_smooth || (filter_x && filter_y))
 624           ASM_REGISTER_STATE_CHECK(
 625               UUT_->hv8_(in, kInputStride, out, kOutputStride,
 626                          filters[filter_x], 16, filters[filter_y], 16,
 627                          Width(), Height()));
 628         else if (filter_y)
 629           ASM_REGISTER_STATE_CHECK(
 630               UUT_->v8_(in, kInputStride, out, kOutputStride,
 631                         kInvalidFilter, 16, filters[filter_y], 16,
 632                         Width(), Height()));
 633         else
 634           ASM_REGISTER_STATE_CHECK(
 635               UUT_->h8_(in, kInputStride, out, kOutputStride,
 636                         filters[filter_x], 16, kInvalidFilter, 16,
 637                         Width(), Height()));
 638
 639         CheckGuardBlocks();
 640
 641         for (int y = 0; y < Height(); ++y)
 642           for (int x = 0; x < Width(); ++x)
 643             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 644                       lookup(out, y * kOutputStride + x))
 645                 << "mismatch at (" << x << "," << y << "), "
 646                 << "filters (" << filter_bank << ","
 647                 << filter_x << "," << filter_y << ")";
 648       }
 649     }
 650   }
 651 }
 652
 653 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
 654   uint8_t* const in = input();
 655   uint8_t* const out = output();
 656 #if CONFIG_VP9_HIGHBITDEPTH
 657   uint8_t ref8[kOutputStride * kMaxDimension];
 658   uint16_t ref16[kOutputStride * kMaxDimension];
 659   uint8_t* ref;
 660   if (UUT_->use_high_bd_ == 0) {
 661     ref = ref8;
 662   } else {
 663     ref = CONVERT_TO_BYTEPTR(ref16);
 664   }
 665 #else
 666   uint8_t ref[kOutputStride * kMaxDimension];
 667 #endif
 668
 669   // Populate ref and out with some random data
 670   ::libvpx_test::ACMRandom prng;
 671   for (int y = 0; y < Height(); ++y) {
 672     for (int x = 0; x < Width(); ++x) {
 673       uint16_t r;
 674 #if CONFIG_VP9_HIGHBITDEPTH
 675       if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
 676         r = prng.Rand8Extremes();
 677       } else {
 678         r = prng.Rand16() & mask_;
 679       }
 680 #else
 681       r = prng.Rand8Extremes();
 682 #endif
 683
 684       assign_val(out, y * kOutputStride + x, r);
 685       assign_val(ref, y * kOutputStride + x, r);
 686     }
 687   }
 688
 689   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 690     const InterpKernel *filters =
 691         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
 692     const InterpKernel *const eighttap_smooth =
 693         vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
 694
 695     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 696       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 697         wrapper_filter_average_block2d_8_c(in, kInputStride,
 698                                            filters[filter_x], filters[filter_y],
 699                                            ref, kOutputStride,
 700                                            Width(), Height());
 701
 702         if (filters == eighttap_smooth || (filter_x && filter_y))
 703           ASM_REGISTER_STATE_CHECK(
 704               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
 705                              filters[filter_x], 16, filters[filter_y], 16,
 706                              Width(), Height()));
 707         else if (filter_y)
 708           ASM_REGISTER_STATE_CHECK(
 709               UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
 710                             filters[filter_x], 16, filters[filter_y], 16,
 711                             Width(), Height()));
 712         else
 713           ASM_REGISTER_STATE_CHECK(
 714               UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
 715                             filters[filter_x], 16, filters[filter_y], 16,
 716                             Width(), Height()));
 717
 718         CheckGuardBlocks();
 719
 720         for (int y = 0; y < Height(); ++y)
 721           for (int x = 0; x < Width(); ++x)
 722             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 723                       lookup(out, y * kOutputStride + x))
 724                 << "mismatch at (" << x << "," << y << "), "
 725                 << "filters (" << filter_bank << ","
 726                 << filter_x << "," << filter_y << ")";
 727       }
 728     }
 729   }
 730 }
 731
 732 TEST_P(ConvolveTest, FilterExtremes) {
 733   uint8_t *const in = input();
 734   uint8_t *const out = output();
 735 #if CONFIG_VP9_HIGHBITDEPTH
 736   uint8_t ref8[kOutputStride * kMaxDimension];
 737   uint16_t ref16[kOutputStride * kMaxDimension];
 738   uint8_t *ref;
 739   if (UUT_->use_high_bd_ == 0) {
 740     ref = ref8;
 741   } else {
 742     ref = CONVERT_TO_BYTEPTR(ref16);
 743   }
 744 #else
 745   uint8_t ref[kOutputStride * kMaxDimension];
 746 #endif
 747
 748   // Populate ref and out with some random data
 749   ::libvpx_test::ACMRandom prng;
 750   for (int y = 0; y < Height(); ++y) {
 751     for (int x = 0; x < Width(); ++x) {
 752       uint16_t r;
 753 #if CONFIG_VP9_HIGHBITDEPTH
 754       if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) {
 755         r = prng.Rand8Extremes();
 756       } else {
 757         r = prng.Rand16() & mask_;
 758       }
 759 #else
 760       r = prng.Rand8Extremes();
 761 #endif
 762       assign_val(out, y * kOutputStride + x, r);
 763       assign_val(ref, y * kOutputStride + x, r);
 764     }
 765   }
 766
 767   for (int axis = 0; axis < 2; axis++) {
 768     int seed_val = 0;
 769     while (seed_val < 256) {
 770       for (int y = 0; y < 8; ++y) {
 771         for (int x = 0; x < 8; ++x) {
 772 #if CONFIG_VP9_HIGHBITDEPTH
 773             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
 774                        ((seed_val >> (axis ? y : x)) & 1) * mask_);
 775 #else
 776             assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
 777                        ((seed_val >> (axis ? y : x)) & 1) * 255);
 778 #endif
 779           if (axis) seed_val++;
 780         }
 781         if (axis)
 782           seed_val-= 8;
 783         else
 784           seed_val++;
 785       }
 786       if (axis) seed_val += 8;
 787
 788       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 789         const InterpKernel *filters =
 790             vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
 791         const InterpKernel *const eighttap_smooth =
 792             vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
 793         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 794           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 795             wrapper_filter_block2d_8_c(in, kInputStride,
 796                                        filters[filter_x], filters[filter_y],
 797                                        ref, kOutputStride,
 798                                        Width(), Height());
 799             if (filters == eighttap_smooth || (filter_x && filter_y))
 800               ASM_REGISTER_STATE_CHECK(
 801                   UUT_->hv8_(in, kInputStride, out, kOutputStride,
 802                              filters[filter_x], 16, filters[filter_y], 16,
 803                              Width(), Height()));
 804             else if (filter_y)
 805               ASM_REGISTER_STATE_CHECK(
 806                   UUT_->v8_(in, kInputStride, out, kOutputStride,
 807                             kInvalidFilter, 16, filters[filter_y], 16,
 808                             Width(), Height()));
 809             else
 810               ASM_REGISTER_STATE_CHECK(
 811                   UUT_->h8_(in, kInputStride, out, kOutputStride,
 812                             filters[filter_x], 16, kInvalidFilter, 16,
 813                             Width(), Height()));
 814
 815             for (int y = 0; y < Height(); ++y)
 816               for (int x = 0; x < Width(); ++x)
 817                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 818                           lookup(out, y * kOutputStride + x))
 819                     << "mismatch at (" << x << "," << y << "), "
 820                     << "filters (" << filter_bank << ","
 821                     << filter_x << "," << filter_y << ")";
 822           }
 823         }
 824       }
 825     }
 826   }
 827 }
 828
 829 DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
 830     { 0,   0,   0,   0,   0,   0,   0, 128},
 831     { 0,   0,   0,   0,   0,   0, 128},
 832     { 0,   0,   0,   0,   0, 128},
 833     { 0,   0,   0,   0, 128},
 834     { 0,   0,   0, 128},
 835     { 0,   0, 128},
 836     { 0, 128},
 837     { 128},
 838     { 0,   0,   0,   0,   0,   0,   0, 128},
 839     { 0,   0,   0,   0,   0,   0, 128},
 840     { 0,   0,   0,   0,   0, 128},
 841     { 0,   0,   0,   0, 128},
 842     { 0,   0,   0, 128},
 843     { 0,   0, 128},
 844     { 0, 128},
 845     { 128}
 846 };
 847
 848 /* This test exercises the horizontal and vertical filter functions. */
 849 TEST_P(ConvolveTest, ChangeFilterWorks) {
 850   uint8_t* const in = input();
 851   uint8_t* const out = output();
 852
 853   /* Assume that the first input sample is at the 8/16th position. */
 854   const int kInitialSubPelOffset = 8;
 855
 856   /* Filters are 8-tap, so the first filter tap will be applied to the pixel
 857    * at position -3 with respect to the current filtering position. Since
 858    * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
 859    * which is non-zero only in the last tap. So, applying the filter at the
 860    * current input position will result in an output equal to the pixel at
 861    * offset +4 (-3 + 7) with respect to the current filtering position.
 862    */
 863   const int kPixelSelected = 4;
 864
 865   /* Assume that each output pixel requires us to step on by 17/16th pixels in
 866    * the input.
 867    */
 868   const int kInputPixelStep = 17;
 869
 870   /* The filters are setup in such a way that the expected output produces
 871    * sets of 8 identical output samples. As the filter position moves to the
 872    * next 1/16th pixel position the only active (=128) filter tap moves one
 873    * position to the left, resulting in the same input pixel being replicated
 874    * in to the output for 8 consecutive samples. After each set of 8 positions
 875    * the filters select a different input pixel. kFilterPeriodAdjust below
 876    * computes which input pixel is written to the output for a specified
 877    * x or y position.
 878    */
 879
 880   /* Test the horizontal filter. */
 881   ASM_REGISTER_STATE_CHECK(
 882       UUT_->h8_(in, kInputStride, out, kOutputStride,
 883                 kChangeFilters[kInitialSubPelOffset],
 884                 kInputPixelStep, NULL, 0, Width(), Height()));
 885
 886   for (int x = 0; x < Width(); ++x) {
 887     const int kFilterPeriodAdjust = (x >> 3) << 3;
 888     const int ref_x =
 889         kPixelSelected + ((kInitialSubPelOffset
 890             + kFilterPeriodAdjust * kInputPixelStep)
 891                           >> SUBPEL_BITS);
 892     ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
 893         << "x == " << x << "width = " << Width();
 894   }
 895
 896   /* Test the vertical filter. */
 897   ASM_REGISTER_STATE_CHECK(
 898       UUT_->v8_(in, kInputStride, out, kOutputStride,
 899                 NULL, 0, kChangeFilters[kInitialSubPelOffset],
 900                 kInputPixelStep, Width(), Height()));
 901
 902   for (int y = 0; y < Height(); ++y) {
 903     const int kFilterPeriodAdjust = (y >> 3) << 3;
 904     const int ref_y =
 905         kPixelSelected + ((kInitialSubPelOffset
 906             + kFilterPeriodAdjust * kInputPixelStep)
 907                           >> SUBPEL_BITS);
 908     ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
 909         << "y == " << y;
 910   }
 911
 912   /* Test the horizontal and vertical filters in combination. */
 913   ASM_REGISTER_STATE_CHECK(
 914       UUT_->hv8_(in, kInputStride, out, kOutputStride,
 915                  kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
 916                  kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
 917                  Width(), Height()));
 918
 919   for (int y = 0; y < Height(); ++y) {
 920     const int kFilterPeriodAdjustY = (y >> 3) << 3;
 921     const int ref_y =
 922         kPixelSelected + ((kInitialSubPelOffset
 923             + kFilterPeriodAdjustY * kInputPixelStep)
 924                           >> SUBPEL_BITS);
 925     for (int x = 0; x < Width(); ++x) {
 926       const int kFilterPeriodAdjustX = (x >> 3) << 3;
 927       const int ref_x =
 928           kPixelSelected + ((kInitialSubPelOffset
 929               + kFilterPeriodAdjustX * kInputPixelStep)
 930                             >> SUBPEL_BITS);
 931
 932       ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
 933                 lookup(out, y * kOutputStride + x))
 934           << "x == " << x << ", y == " << y;
 935     }
 936   }
 937 }
 938
 939 /* This test exercises that enough rows and columns are filtered with every
 940    possible initial fractional positions and scaling steps. */
 941 TEST_P(ConvolveTest, CheckScalingFiltering) {
 942   uint8_t* const in = input();
 943   uint8_t* const out = output();
 944   const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
 945
 946   SetConstantInput(127);
 947
 948   for (int frac = 0; frac < 16; ++frac) {
 949     for (int step = 1; step <= 32; ++step) {
 950       /* Test the horizontal and vertical filters in combination. */
 951       ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
 952                                           eighttap[frac], step,
 953                                           eighttap[frac], step,
 954                                           Width(), Height()));
 955
 956       CheckGuardBlocks();
 957
 958       for (int y = 0; y < Height(); ++y) {
 959         for (int x = 0; x < Width(); ++x) {
 960           ASSERT_EQ(lookup(in, y * kInputStride + x),
 961                     lookup(out, y * kOutputStride + x))
 962               << "x == " << x << ", y == " << y
 963               << ", frac == " << frac << ", step == " << step;
 964         }
 965       }
 966     }
 967   }
 968 }
 969
 970 using std::tr1::make_tuple;
 971
 972 #if CONFIG_VP9_HIGHBITDEPTH
 973 #if HAVE_SSE2 && ARCH_X86_64
 974 void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
 975                                  uint8_t *dst, ptrdiff_t dst_stride,
 976                                  const int16_t *filter_x,
 977                                  int filter_x_stride,
 978                                  const int16_t *filter_y,
 979                                  int filter_y_stride,
 980                                  int w, int h) {
 981   vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
 982                                 filter_x_stride, filter_y, filter_y_stride,
 983                                 w, h, 8);
 984 }
 985
 986 void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
 987                                      uint8_t *dst, ptrdiff_t dst_stride,
 988                                      const int16_t *filter_x,
 989                                      int filter_x_stride,
 990                                      const int16_t *filter_y,
 991                                      int filter_y_stride,
 992                                      int w, int h) {
 993   vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
 994     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
 995 }
 996
 997 void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
 998                                 uint8_t *dst, ptrdiff_t dst_stride,
 999                                 const int16_t *filter_x,
1000                                 int filter_x_stride,
1001                                 const int16_t *filter_y,
1002                                 int filter_y_stride,
1003                                 int w, int h) {
1004   vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1005     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1006 }
1007
1008 void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1009                                     uint8_t *dst, ptrdiff_t dst_stride,
1010                                     const int16_t *filter_x,
1011                                     int filter_x_stride,
1012                                     const int16_t *filter_y,
1013                                     int filter_y_stride,
1014                                     int w, int h) {
1015   vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1016                                    filter_x_stride, filter_y, filter_y_stride,
1017                                    w, h, 8);
1018 }
1019
1020 void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1021                            uint8_t *dst, ptrdiff_t dst_stride,
1022                            const int16_t *filter_x,
1023                            int filter_x_stride,
1024                            const int16_t *filter_y,
1025                            int filter_y_stride,
1026                            int w, int h) {
1027   vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1028     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1029 }
1030
1031 void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
1032                                uint8_t *dst, ptrdiff_t dst_stride,
1033                                const int16_t *filter_x,
1034                                int filter_x_stride,
1035                                const int16_t *filter_y,
1036                                int filter_y_stride,
1037                                int w, int h) {
1038   vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1039     filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1040 }
1041
1042 void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1043                                   uint8_t *dst, ptrdiff_t dst_stride,
1044                                   const int16_t *filter_x,
1045                                   int filter_x_stride,
1046                                   const int16_t *filter_y,
1047                                   int filter_y_stride,
1048                                   int w, int h) {
1049   vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1050     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1051 }
1052
1053 void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1054                                       uint8_t *dst, ptrdiff_t dst_stride,
1055                                       const int16_t *filter_x,
1056                                       int filter_x_stride,
1057                                       const int16_t *filter_y,
1058                                       int filter_y_stride,
1059                                       int w, int h) {
1060   vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1061     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1062 }
1063
1064 void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1065                                  uint8_t *dst, ptrdiff_t dst_stride,
1066                                  const int16_t *filter_x,
1067                                  int filter_x_stride,
1068                                  const int16_t *filter_y,
1069                                  int filter_y_stride,
1070                                  int w, int h) {
1071   vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1072     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1073 }
1074
1075 void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1076                                      uint8_t *dst, ptrdiff_t dst_stride,
1077                                      const int16_t *filter_x,
1078                                      int filter_x_stride,
1079                                      const int16_t *filter_y,
1080                                      int filter_y_stride,
1081                                      int w, int h) {
1082   vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1083     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1084 }
1085
1086 void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1087                             uint8_t *dst, ptrdiff_t dst_stride,
1088                             const int16_t *filter_x,
1089                             int filter_x_stride,
1090                             const int16_t *filter_y,
1091                             int filter_y_stride,
1092                             int w, int h) {
1093   vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1094     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1095 }
1096
1097 void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
1098                                 uint8_t *dst, ptrdiff_t dst_stride,
1099                                 const int16_t *filter_x,
1100                                 int filter_x_stride,
1101                                 const int16_t *filter_y,
1102                                 int filter_y_stride,
1103                                 int w, int h) {
1104   vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1105                               filter_x_stride, filter_y, filter_y_stride,
1106                               w, h, 10);
1107 }
1108
1109 void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1110                                   uint8_t *dst, ptrdiff_t dst_stride,
1111                                   const int16_t *filter_x,
1112                                   int filter_x_stride,
1113                                   const int16_t *filter_y,
1114                                   int filter_y_stride,
1115                                   int w, int h) {
1116   vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1117                                 filter_x_stride, filter_y, filter_y_stride,
1118                                 w, h, 12);
1119 }
1120
1121 void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1122                                       uint8_t *dst, ptrdiff_t dst_stride,
1123                                       const int16_t *filter_x,
1124                                       int filter_x_stride,
1125                                       const int16_t *filter_y,
1126                                       int filter_y_stride,
1127                                       int w, int h) {
1128   vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
1129                                     filter_x_stride, filter_y, filter_y_stride,
1130                                     w, h, 12);
1131 }
1132
1133 void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1134                                  uint8_t *dst, ptrdiff_t dst_stride,
1135                                  const int16_t *filter_x,
1136                                  int filter_x_stride,
1137                                  const int16_t *filter_y,
1138                                  int filter_y_stride,
1139                                  int w, int h) {
1140   vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1141                                filter_x_stride, filter_y, filter_y_stride,
1142                                w, h, 12);
1143 }
1144
1145 void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1146                                      uint8_t *dst, ptrdiff_t dst_stride,
1147                                      const int16_t *filter_x,
1148                                      int filter_x_stride,
1149                                      const int16_t *filter_y,
1150                                      int filter_y_stride,
1151                                      int w, int h) {
1152   vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x,
1153                                    filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1154 }
1155
1156 void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1157                             uint8_t *dst, ptrdiff_t dst_stride,
1158                             const int16_t *filter_x,
1159                             int filter_x_stride,
1160                             const int16_t *filter_y,
1161                             int filter_y_stride,
1162                             int w, int h) {
1163   vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x,
1164     filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1165 }
1166
1167 void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
1168                                 uint8_t *dst, ptrdiff_t dst_stride,
1169                                 const int16_t *filter_x,
1170                                 int filter_x_stride,
1171                                 const int16_t *filter_y,
1172                                 int filter_y_stride,
1173                                 int w, int h) {
1174   vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x,
1175                               filter_x_stride, filter_y, filter_y_stride, w, h, 12);
1176 }
1177 #endif  // HAVE_SSE2 && ARCH_X86_64
1178
1179 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1180                               uint8_t *dst, ptrdiff_t dst_stride,
1181                               const int16_t *filter_x,
1182                               int filter_x_stride,
1183                               const int16_t *filter_y,
1184                               int filter_y_stride,
1185                               int w, int h) {
1186   vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1187                              filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1188 }
1189
1190 void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
1191                                   uint8_t *dst, ptrdiff_t dst_stride,
1192                                   const int16_t *filter_x,
1193                                   int filter_x_stride,
1194                                   const int16_t *filter_y,
1195                                   int filter_y_stride,
1196                                   int w, int h) {
1197   vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1198                                  filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1199 }
1200
1201 void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1202                              uint8_t *dst, ptrdiff_t dst_stride,
1203                              const int16_t *filter_x,
1204                              int filter_x_stride,
1205                              const int16_t *filter_y,
1206                              int filter_y_stride,
1207                              int w, int h) {
1208   vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1209                             filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1210 }
1211
1212 void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
1213                                  uint8_t *dst, ptrdiff_t dst_stride,
1214                                  const int16_t *filter_x,
1215                                  int filter_x_stride,
1216                                  const int16_t *filter_y,
1217                                  int filter_y_stride,
1218                                  int w, int h) {
1219   vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1220                                 filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1221 }
1222
1223 void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
1224                         uint8_t *dst, ptrdiff_t dst_stride,
1225                         const int16_t *filter_x,
1226                         int filter_x_stride,
1227                         const int16_t *filter_y,
1228                         int filter_y_stride,
1229                         int w, int h) {
1230   vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1231                        filter_x_stride, filter_y, filter_y_stride, w, h, 8);
1232 }
1233
1234 void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
1235                             uint8_t *dst, ptrdiff_t dst_stride,
1236                             const int16_t *filter_x,
1237                             int filter_x_stride,
1238                             const int16_t *filter_y,
1239                             int filter_y_stride,
1240                             int w, int h) {
1241   vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1242                            filter_x_stride, filter_y, filter_y_stride,
1243                            w, h, 8);
1244 }
1245
1246 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1247                                uint8_t *dst, ptrdiff_t dst_stride,
1248                                const int16_t *filter_x,
1249                                int filter_x_stride,
1250                                const int16_t *filter_y,
1251                                int filter_y_stride,
1252                                int w, int h) {
1253   vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1254                              filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1255 }
1256
1257 void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
1258                                    uint8_t *dst, ptrdiff_t dst_stride,
1259                                    const int16_t *filter_x,
1260                                    int filter_x_stride,
1261                                    const int16_t *filter_y,
1262                                    int filter_y_stride,
1263                                    int w, int h) {
1264   vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1265                                  filter_x_stride, filter_y, filter_y_stride,
1266                                  w, h, 10);
1267 }
1268
1269 void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1270                               uint8_t *dst, ptrdiff_t dst_stride,
1271                               const int16_t *filter_x,
1272                               int filter_x_stride,
1273                               const int16_t *filter_y,
1274                               int filter_y_stride,
1275                               int w, int h) {
1276   vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1277                             filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1278 }
1279
1280 void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
1281                                   uint8_t *dst, ptrdiff_t dst_stride,
1282                                   const int16_t *filter_x,
1283                                   int filter_x_stride,
1284                                   const int16_t *filter_y,
1285                                   int filter_y_stride,
1286                                   int w, int h) {
1287   vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1288                                 filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1289 }
1290
1291 void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
1292                          uint8_t *dst, ptrdiff_t dst_stride,
1293                          const int16_t *filter_x,
1294                          int filter_x_stride,
1295                          const int16_t *filter_y,
1296                          int filter_y_stride,
1297                          int w, int h) {
1298   vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1299     filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1300 }
1301
1302 void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
1303                              uint8_t *dst, ptrdiff_t dst_stride,
1304                              const int16_t *filter_x,
1305                              int filter_x_stride,
1306                              const int16_t *filter_y,
1307                              int filter_y_stride,
1308                              int w, int h) {
1309   vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1310                            filter_x_stride, filter_y, filter_y_stride, w, h, 10);
1311 }
1312
1313 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1314                                uint8_t *dst, ptrdiff_t dst_stride,
1315                                const int16_t *filter_x,
1316                                int filter_x_stride,
1317                                const int16_t *filter_y,
1318                                int filter_y_stride,
1319                                int w, int h) {
1320   vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1321                              filter_x_stride, filter_y, filter_y_stride,
1322                              w, h, 12);
1323 }
1324
1325 void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
1326                                    uint8_t *dst, ptrdiff_t dst_stride,
1327                                    const int16_t *filter_x,
1328                                    int filter_x_stride,
1329                                    const int16_t *filter_y,
1330                                    int filter_y_stride,
1331                                    int w, int h) {
1332   vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
1333                                  filter_x_stride, filter_y, filter_y_stride,
1334                                  w, h, 12);
1335 }
1336
1337 void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1338                               uint8_t *dst, ptrdiff_t dst_stride,
1339                               const int16_t *filter_x,
1340                               int filter_x_stride,
1341                               const int16_t *filter_y,
1342                               int filter_y_stride,
1343                               int w, int h) {
1344   vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
1345                             filter_x_stride, filter_y, filter_y_stride,
1346                             w, h, 12);
1347 }
1348
1349 void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
1350                                   uint8_t *dst, ptrdiff_t dst_stride,
1351                                   const int16_t *filter_x,
1352                                   int filter_x_stride,
1353                                   const int16_t *filter_y,
1354                                   int filter_y_stride,
1355                                   int w, int h) {
1356   vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
1357                                 filter_x_stride, filter_y, filter_y_stride,
1358                                 w, h, 12);
1359 }
1360
1361 void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
1362                          uint8_t *dst, ptrdiff_t dst_stride,
1363                          const int16_t *filter_x,
1364                          int filter_x_stride,
1365                          const int16_t *filter_y,
1366                          int filter_y_stride,
1367                          int w, int h) {
1368   vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x,
1369                        filter_x_stride, filter_y, filter_y_stride,
1370                        w, h, 12);
1371 }
1372
1373 void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
1374                              uint8_t *dst, ptrdiff_t dst_stride,
1375                              const int16_t *filter_x,
1376                              int filter_x_stride,
1377                              const int16_t *filter_y,
1378                              int filter_y_stride,
1379                              int w, int h) {
1380   vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
1381                            filter_x_stride, filter_y, filter_y_stride,
1382                            w, h, 12);
1383 }
1384
1385 const ConvolveFunctions convolve8_c(
1386     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1387     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
1388     wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1389 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
1390     make_tuple(4, 4, &convolve8_c),
1391     make_tuple(8, 4, &convolve8_c),
1392     make_tuple(4, 8, &convolve8_c),
1393     make_tuple(8, 8, &convolve8_c),
1394     make_tuple(16, 8, &convolve8_c),
1395     make_tuple(8, 16, &convolve8_c),
1396     make_tuple(16, 16, &convolve8_c),
1397     make_tuple(32, 16, &convolve8_c),
1398     make_tuple(16, 32, &convolve8_c),
1399     make_tuple(32, 32, &convolve8_c),
1400     make_tuple(64, 32, &convolve8_c),
1401     make_tuple(32, 64, &convolve8_c),
1402     make_tuple(64, 64, &convolve8_c)));
1403 const ConvolveFunctions convolve10_c(
1404     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1405     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
1406     wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
1407 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
1408     make_tuple(4, 4, &convolve10_c),
1409     make_tuple(8, 4, &convolve10_c),
1410     make_tuple(4, 8, &convolve10_c),
1411     make_tuple(8, 8, &convolve10_c),
1412     make_tuple(16, 8, &convolve10_c),
1413     make_tuple(8, 16, &convolve10_c),
1414     make_tuple(16, 16, &convolve10_c),
1415     make_tuple(32, 16, &convolve10_c),
1416     make_tuple(16, 32, &convolve10_c),
1417     make_tuple(32, 32, &convolve10_c),
1418     make_tuple(64, 32, &convolve10_c),
1419     make_tuple(32, 64, &convolve10_c),
1420     make_tuple(64, 64, &convolve10_c)));
1421 const ConvolveFunctions convolve12_c(
1422     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1423     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
1424     wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
1425 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
1426     make_tuple(4, 4, &convolve12_c),
1427     make_tuple(8, 4, &convolve12_c),
1428     make_tuple(4, 8, &convolve12_c),
1429     make_tuple(8, 8, &convolve12_c),
1430     make_tuple(16, 8, &convolve12_c),
1431     make_tuple(8, 16, &convolve12_c),
1432     make_tuple(16, 16, &convolve12_c),
1433     make_tuple(32, 16, &convolve12_c),
1434     make_tuple(16, 32, &convolve12_c),
1435     make_tuple(32, 32, &convolve12_c),
1436     make_tuple(64, 32, &convolve12_c),
1437     make_tuple(32, 64, &convolve12_c),
1438     make_tuple(64, 64, &convolve12_c)));
1439
1440 #else
1441
1442 const ConvolveFunctions convolve8_c(
1443     vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
1444     vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
1445     vp9_convolve8_c, vp9_convolve8_avg_c, 0);
1446
1447 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
1448     make_tuple(4, 4, &convolve8_c),
1449     make_tuple(8, 4, &convolve8_c),
1450     make_tuple(4, 8, &convolve8_c),
1451     make_tuple(8, 8, &convolve8_c),
1452     make_tuple(16, 8, &convolve8_c),
1453     make_tuple(8, 16, &convolve8_c),
1454     make_tuple(16, 16, &convolve8_c),
1455     make_tuple(32, 16, &convolve8_c),
1456     make_tuple(16, 32, &convolve8_c),
1457     make_tuple(32, 32, &convolve8_c),
1458     make_tuple(64, 32, &convolve8_c),
1459     make_tuple(32, 64, &convolve8_c),
1460     make_tuple(64, 64, &convolve8_c)));
1461 #endif
1462
1463 #if HAVE_SSE2 && ARCH_X86_64
1464 #if CONFIG_VP9_HIGHBITDEPTH
1465 const ConvolveFunctions convolve8_sse2(
1466     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1467     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1468     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1469 INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
1470     make_tuple(4, 4, &convolve8_sse2),
1471     make_tuple(8, 4, &convolve8_sse2),
1472     make_tuple(4, 8, &convolve8_sse2),
1473     make_tuple(8, 8, &convolve8_sse2),
1474     make_tuple(16, 8, &convolve8_sse2),
1475     make_tuple(8, 16, &convolve8_sse2),
1476     make_tuple(16, 16, &convolve8_sse2),
1477     make_tuple(32, 16, &convolve8_sse2),
1478     make_tuple(16, 32, &convolve8_sse2),
1479     make_tuple(32, 32, &convolve8_sse2),
1480     make_tuple(64, 32, &convolve8_sse2),
1481     make_tuple(32, 64, &convolve8_sse2),
1482     make_tuple(64, 64, &convolve8_sse2)));
1483 const ConvolveFunctions convolve10_sse2(
1484     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1485     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1486     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1487 INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
1488     make_tuple(4, 4, &convolve10_sse2),
1489     make_tuple(8, 4, &convolve10_sse2),
1490     make_tuple(4, 8, &convolve10_sse2),
1491     make_tuple(8, 8, &convolve10_sse2),
1492     make_tuple(16, 8, &convolve10_sse2),
1493     make_tuple(8, 16, &convolve10_sse2),
1494     make_tuple(16, 16, &convolve10_sse2),
1495     make_tuple(32, 16, &convolve10_sse2),
1496     make_tuple(16, 32, &convolve10_sse2),
1497     make_tuple(32, 32, &convolve10_sse2),
1498     make_tuple(64, 32, &convolve10_sse2),
1499     make_tuple(32, 64, &convolve10_sse2),
1500     make_tuple(64, 64, &convolve10_sse2)));
1501 const ConvolveFunctions convolve12_sse2(
1502     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1503     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1504     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1505 INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
1506     make_tuple(4, 4, &convolve12_sse2),
1507     make_tuple(8, 4, &convolve12_sse2),
1508     make_tuple(4, 8, &convolve12_sse2),
1509     make_tuple(8, 8, &convolve12_sse2),
1510     make_tuple(16, 8, &convolve12_sse2),
1511     make_tuple(8, 16, &convolve12_sse2),
1512     make_tuple(16, 16, &convolve12_sse2),
1513     make_tuple(32, 16, &convolve12_sse2),
1514     make_tuple(16, 32, &convolve12_sse2),
1515     make_tuple(32, 32, &convolve12_sse2),
1516     make_tuple(64, 32, &convolve12_sse2),
1517     make_tuple(32, 64, &convolve12_sse2),
1518     make_tuple(64, 64, &convolve12_sse2)));
1519 #else
1520 const ConvolveFunctions convolve8_sse2(
1521     vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
1522     vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
1523     vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
1524
1525 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
1526     make_tuple(4, 4, &convolve8_sse2),
1527     make_tuple(8, 4, &convolve8_sse2),
1528     make_tuple(4, 8, &convolve8_sse2),
1529     make_tuple(8, 8, &convolve8_sse2),
1530     make_tuple(16, 8, &convolve8_sse2),
1531     make_tuple(8, 16, &convolve8_sse2),
1532     make_tuple(16, 16, &convolve8_sse2),
1533     make_tuple(32, 16, &convolve8_sse2),
1534     make_tuple(16, 32, &convolve8_sse2),
1535     make_tuple(32, 32, &convolve8_sse2),
1536     make_tuple(64, 32, &convolve8_sse2),
1537     make_tuple(32, 64, &convolve8_sse2),
1538     make_tuple(64, 64, &convolve8_sse2)));
1539 #endif  // CONFIG_VP9_HIGHBITDEPTH
1540 #endif
1541
1542 #if HAVE_SSSE3
1543 const ConvolveFunctions convolve8_ssse3(
1544     vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
1545     vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
1546     vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
1547
1548 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
1549     make_tuple(4, 4, &convolve8_ssse3),
1550     make_tuple(8, 4, &convolve8_ssse3),
1551     make_tuple(4, 8, &convolve8_ssse3),
1552     make_tuple(8, 8, &convolve8_ssse3),
1553     make_tuple(16, 8, &convolve8_ssse3),
1554     make_tuple(8, 16, &convolve8_ssse3),
1555     make_tuple(16, 16, &convolve8_ssse3),
1556     make_tuple(32, 16, &convolve8_ssse3),
1557     make_tuple(16, 32, &convolve8_ssse3),
1558     make_tuple(32, 32, &convolve8_ssse3),
1559     make_tuple(64, 32, &convolve8_ssse3),
1560     make_tuple(32, 64, &convolve8_ssse3),
1561     make_tuple(64, 64, &convolve8_ssse3)));
1562 #endif
1563
1564 #if HAVE_AVX2 && HAVE_SSSE3
1565 const ConvolveFunctions convolve8_avx2(
1566     vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
1567     vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
1568     vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
1569
1570 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
1571     make_tuple(4, 4, &convolve8_avx2),
1572     make_tuple(8, 4, &convolve8_avx2),
1573     make_tuple(4, 8, &convolve8_avx2),
1574     make_tuple(8, 8, &convolve8_avx2),
1575     make_tuple(8, 16, &convolve8_avx2),
1576     make_tuple(16, 8, &convolve8_avx2),
1577     make_tuple(16, 16, &convolve8_avx2),
1578     make_tuple(32, 16, &convolve8_avx2),
1579     make_tuple(16, 32, &convolve8_avx2),
1580     make_tuple(32, 32, &convolve8_avx2),
1581     make_tuple(64, 32, &convolve8_avx2),
1582     make_tuple(32, 64, &convolve8_avx2),
1583     make_tuple(64, 64, &convolve8_avx2)));
1584 #endif  // HAVE_AVX2 && HAVE_SSSE3
1585
1586 #if HAVE_NEON_ASM
1587 const ConvolveFunctions convolve8_neon(
1588     vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
1589     vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
1590     vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
1591
1592 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
1593     make_tuple(4, 4, &convolve8_neon),
1594     make_tuple(8, 4, &convolve8_neon),
1595     make_tuple(4, 8, &convolve8_neon),
1596     make_tuple(8, 8, &convolve8_neon),
1597     make_tuple(16, 8, &convolve8_neon),
1598     make_tuple(8, 16, &convolve8_neon),
1599     make_tuple(16, 16, &convolve8_neon),
1600     make_tuple(32, 16, &convolve8_neon),
1601     make_tuple(16, 32, &convolve8_neon),
1602     make_tuple(32, 32, &convolve8_neon),
1603     make_tuple(64, 32, &convolve8_neon),
1604     make_tuple(32, 64, &convolve8_neon),
1605     make_tuple(64, 64, &convolve8_neon)));
1606 #endif
1607
1608 #if HAVE_DSPR2
1609 const ConvolveFunctions convolve8_dspr2(
1610     vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
1611     vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
1612     vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
1613
1614 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
1615     make_tuple(4, 4, &convolve8_dspr2),
1616     make_tuple(8, 4, &convolve8_dspr2),
1617     make_tuple(4, 8, &convolve8_dspr2),
1618     make_tuple(8, 8, &convolve8_dspr2),
1619     make_tuple(16, 8, &convolve8_dspr2),
1620     make_tuple(8, 16, &convolve8_dspr2),
1621     make_tuple(16, 16, &convolve8_dspr2),
1622     make_tuple(32, 16, &convolve8_dspr2),
1623     make_tuple(16, 32, &convolve8_dspr2),
1624     make_tuple(32, 32, &convolve8_dspr2),
1625     make_tuple(64, 32, &convolve8_dspr2),
1626     make_tuple(32, 64, &convolve8_dspr2),
1627     make_tuple(64, 64, &convolve8_dspr2)));
1628 #endif
1629 }  // namespace