test/fdct8x8_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <tuple>
  15
  16 #include "third_party/googletest/src/include/gtest/gtest.h"
  17
  18 #include "./vp9_rtcd.h"
  19 #include "./vpx_dsp_rtcd.h"
  20 #include "test/acm_random.h"
  21 #include "test/clear_system_state.h"
  22 #include "test/register_state_check.h"
  23 #include "test/util.h"
  24 #include "vp9/common/vp9_entropy.h"
  25 #include "vp9/common/vp9_scan.h"
  26 #include "vpx/vpx_codec.h"
  27 #include "vpx/vpx_integer.h"
  28 #include "vpx_ports/mem.h"
  29
  30 using libvpx_test::ACMRandom;
  31
  32 namespace {
  33
  34 const int kNumCoeffs = 64;
  35 const double kPi = 3.141592653589793238462643383279502884;
  36
  37 const int kSignBiasMaxDiff255 = 1500;
  38 const int kSignBiasMaxDiff15 = 10000;
  39
  40 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
  41 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
  42 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
  43                         int tx_type);
  44 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
  45                         int tx_type);
  46
  47 typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
  48 typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
  49 typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
  50
  51 void reference_8x8_dct_1d(const double in[8], double out[8]) {
  52   const double kInvSqrt2 = 0.707106781186547524400844362104;
  53   for (int k = 0; k < 8; k++) {
  54     out[k] = 0.0;
  55     for (int n = 0; n < 8; n++) {
  56       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
  57     }
  58     if (k == 0) out[k] = out[k] * kInvSqrt2;
  59   }
  60 }
  61
  62 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
  63                           double output[kNumCoeffs]) {
  64   // First transform columns
  65   for (int i = 0; i < 8; ++i) {
  66     double temp_in[8], temp_out[8];
  67     for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
  68     reference_8x8_dct_1d(temp_in, temp_out);
  69     for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
  70   }
  71   // Then transform rows
  72   for (int i = 0; i < 8; ++i) {
  73     double temp_in[8], temp_out[8];
  74     for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
  75     reference_8x8_dct_1d(temp_in, temp_out);
  76     // Scale by some magic number
  77     for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
  78   }
  79 }
  80
  81 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
  82                  int /*tx_type*/) {
  83   vpx_fdct8x8_c(in, out, stride);
  84 }
  85
  86 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  87   vp9_fht8x8_c(in, out, stride, tx_type);
  88 }
  89
  90 #if CONFIG_VP9_HIGHBITDEPTH
  91 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
  92   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  93 }
  94
  95 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
  96   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  97 }
  98
  99 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 100   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
 101 }
 102
 103 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 104   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
 105 }
 106
 107 #if HAVE_SSE2
 108
 109 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
 110   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
 111 }
 112
 113 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
 114   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
 115 }
 116
 117 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 118   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
 119 }
 120
 121 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 122   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
 123 }
 124
 125 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 126   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
 127 }
 128
 129 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 130   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
 131 }
 132 #endif  // HAVE_SSE2
 133 #endif  // CONFIG_VP9_HIGHBITDEPTH
 134
 135 class FwdTrans8x8TestBase {
 136  public:
 137   virtual ~FwdTrans8x8TestBase() {}
 138
 139  protected:
 140   virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
 141   virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
 142
 143   void RunSignBiasCheck() {
 144     ACMRandom rnd(ACMRandom::DeterministicSeed());
 145     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
 146     DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
 147     int count_sign_block[64][2];
 148     const int count_test_block = 100000;
 149
 150     memset(count_sign_block, 0, sizeof(count_sign_block));
 151
 152     for (int i = 0; i < count_test_block; ++i) {
 153       // Initialize a test block with input range [-255, 255].
 154       for (int j = 0; j < 64; ++j) {
 155         test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
 156                               ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
 157       }
 158       ASM_REGISTER_STATE_CHECK(
 159           RunFwdTxfm(test_input_block, test_output_block, pitch_));
 160
 161       for (int j = 0; j < 64; ++j) {
 162         if (test_output_block[j] < 0) {
 163           ++count_sign_block[j][0];
 164         } else if (test_output_block[j] > 0) {
 165           ++count_sign_block[j][1];
 166         }
 167       }
 168     }
 169
 170     for (int j = 0; j < 64; ++j) {
 171       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
 172       const int max_diff = kSignBiasMaxDiff255;
 173       EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
 174           << "Error: 8x8 FDCT/FHT has a sign bias > "
 175           << 1. * max_diff / count_test_block * 100 << "%"
 176           << " for input range [-255, 255] at index " << j
 177           << " count0: " << count_sign_block[j][0]
 178           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
 179     }
 180
 181     memset(count_sign_block, 0, sizeof(count_sign_block));
 182
 183     for (int i = 0; i < count_test_block; ++i) {
 184       // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
 185       for (int j = 0; j < 64; ++j) {
 186         test_input_block[j] =
 187             ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
 188       }
 189       ASM_REGISTER_STATE_CHECK(
 190           RunFwdTxfm(test_input_block, test_output_block, pitch_));
 191
 192       for (int j = 0; j < 64; ++j) {
 193         if (test_output_block[j] < 0) {
 194           ++count_sign_block[j][0];
 195         } else if (test_output_block[j] > 0) {
 196           ++count_sign_block[j][1];
 197         }
 198       }
 199     }
 200
 201     for (int j = 0; j < 64; ++j) {
 202       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
 203       const int max_diff = kSignBiasMaxDiff15;
 204       EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
 205           << "Error: 8x8 FDCT/FHT has a sign bias > "
 206           << 1. * max_diff / count_test_block * 100 << "%"
 207           << " for input range [-15, 15] at index " << j
 208           << " count0: " << count_sign_block[j][0]
 209           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
 210     }
 211   }
 212
 213   void RunRoundTripErrorCheck() {
 214     ACMRandom rnd(ACMRandom::DeterministicSeed());
 215     int max_error = 0;
 216     int total_error = 0;
 217     const int count_test_block = 100000;
 218     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
 219     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
 220     DECLARE_ALIGNED(16, uint8_t, dst[64]);
 221     DECLARE_ALIGNED(16, uint8_t, src[64]);
 222 #if CONFIG_VP9_HIGHBITDEPTH
 223     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
 224     DECLARE_ALIGNED(16, uint16_t, src16[64]);
 225 #endif
 226
 227     for (int i = 0; i < count_test_block; ++i) {
 228       // Initialize a test block with input range [-mask_, mask_].
 229       for (int j = 0; j < 64; ++j) {
 230         if (bit_depth_ == VPX_BITS_8) {
 231           src[j] = rnd.Rand8();
 232           dst[j] = rnd.Rand8();
 233           test_input_block[j] = src[j] - dst[j];
 234 #if CONFIG_VP9_HIGHBITDEPTH
 235         } else {
 236           src16[j] = rnd.Rand16() & mask_;
 237           dst16[j] = rnd.Rand16() & mask_;
 238           test_input_block[j] = src16[j] - dst16[j];
 239 #endif
 240         }
 241       }
 242
 243       ASM_REGISTER_STATE_CHECK(
 244           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
 245       for (int j = 0; j < 64; ++j) {
 246         if (test_temp_block[j] > 0) {
 247           test_temp_block[j] += 2;
 248           test_temp_block[j] /= 4;
 249           test_temp_block[j] *= 4;
 250         } else {
 251           test_temp_block[j] -= 2;
 252           test_temp_block[j] /= 4;
 253           test_temp_block[j] *= 4;
 254         }
 255       }
 256       if (bit_depth_ == VPX_BITS_8) {
 257         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 258 #if CONFIG_VP9_HIGHBITDEPTH
 259       } else {
 260         ASM_REGISTER_STATE_CHECK(
 261             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
 262 #endif
 263       }
 264
 265       for (int j = 0; j < 64; ++j) {
 266 #if CONFIG_VP9_HIGHBITDEPTH
 267         const int diff =
 268             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 269 #else
 270         const int diff = dst[j] - src[j];
 271 #endif
 272         const int error = diff * diff;
 273         if (max_error < error) max_error = error;
 274         total_error += error;
 275       }
 276     }
 277
 278     EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
 279         << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
 280         << " roundtrip error > 1";
 281
 282     EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
 283         << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
 284         << "error > 1/5 per block";
 285   }
 286
 287   void RunExtremalCheck() {
 288     ACMRandom rnd(ACMRandom::DeterministicSeed());
 289     int max_error = 0;
 290     int total_error = 0;
 291     int total_coeff_error = 0;
 292     const int count_test_block = 100000;
 293     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
 294     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
 295     DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
 296     DECLARE_ALIGNED(16, uint8_t, dst[64]);
 297     DECLARE_ALIGNED(16, uint8_t, src[64]);
 298 #if CONFIG_VP9_HIGHBITDEPTH
 299     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
 300     DECLARE_ALIGNED(16, uint16_t, src16[64]);
 301 #endif
 302
 303     for (int i = 0; i < count_test_block; ++i) {
 304       // Initialize a test block with input range [-mask_, mask_].
 305       for (int j = 0; j < 64; ++j) {
 306         if (bit_depth_ == VPX_BITS_8) {
 307           if (i == 0) {
 308             src[j] = 255;
 309             dst[j] = 0;
 310           } else if (i == 1) {
 311             src[j] = 0;
 312             dst[j] = 255;
 313           } else {
 314             src[j] = rnd.Rand8() % 2 ? 255 : 0;
 315             dst[j] = rnd.Rand8() % 2 ? 255 : 0;
 316           }
 317           test_input_block[j] = src[j] - dst[j];
 318 #if CONFIG_VP9_HIGHBITDEPTH
 319         } else {
 320           if (i == 0) {
 321             src16[j] = mask_;
 322             dst16[j] = 0;
 323           } else if (i == 1) {
 324             src16[j] = 0;
 325             dst16[j] = mask_;
 326           } else {
 327             src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
 328             dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
 329           }
 330           test_input_block[j] = src16[j] - dst16[j];
 331 #endif
 332         }
 333       }
 334
 335       ASM_REGISTER_STATE_CHECK(
 336           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
 337       ASM_REGISTER_STATE_CHECK(
 338           fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
 339       if (bit_depth_ == VPX_BITS_8) {
 340         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 341 #if CONFIG_VP9_HIGHBITDEPTH
 342       } else {
 343         ASM_REGISTER_STATE_CHECK(
 344             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
 345 #endif
 346       }
 347
 348       for (int j = 0; j < 64; ++j) {
 349 #if CONFIG_VP9_HIGHBITDEPTH
 350         const int diff =
 351             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 352 #else
 353         const int diff = dst[j] - src[j];
 354 #endif
 355         const int error = diff * diff;
 356         if (max_error < error) max_error = error;
 357         total_error += error;
 358
 359         const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
 360         total_coeff_error += abs(coeff_diff);
 361       }
 362
 363       EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
 364           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
 365           << "an individual roundtrip error > 1";
 366
 367       EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
 368           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
 369           << " roundtrip error > 1/5 per block";
 370
 371       EXPECT_EQ(0, total_coeff_error)
 372           << "Error: Extremal 8x8 FDCT/FHT has"
 373           << "overflow issues in the intermediate steps > 1";
 374     }
 375   }
 376
 377   void RunInvAccuracyCheck() {
 378     ACMRandom rnd(ACMRandom::DeterministicSeed());
 379     const int count_test_block = 1000;
 380     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
 381     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 382     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 383     DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 384 #if CONFIG_VP9_HIGHBITDEPTH
 385     DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 386     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 387 #endif
 388
 389     for (int i = 0; i < count_test_block; ++i) {
 390       double out_r[kNumCoeffs];
 391
 392       // Initialize a test block with input range [-255, 255].
 393       for (int j = 0; j < kNumCoeffs; ++j) {
 394         if (bit_depth_ == VPX_BITS_8) {
 395           src[j] = rnd.Rand8() % 2 ? 255 : 0;
 396           dst[j] = src[j] > 0 ? 0 : 255;
 397           in[j] = src[j] - dst[j];
 398 #if CONFIG_VP9_HIGHBITDEPTH
 399         } else {
 400           src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
 401           dst16[j] = src16[j] > 0 ? 0 : mask_;
 402           in[j] = src16[j] - dst16[j];
 403 #endif
 404         }
 405       }
 406
 407       reference_8x8_dct_2d(in, out_r);
 408       for (int j = 0; j < kNumCoeffs; ++j) {
 409         coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
 410       }
 411
 412       if (bit_depth_ == VPX_BITS_8) {
 413         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 414 #if CONFIG_VP9_HIGHBITDEPTH
 415       } else {
 416         ASM_REGISTER_STATE_CHECK(
 417             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
 418 #endif
 419       }
 420
 421       for (int j = 0; j < kNumCoeffs; ++j) {
 422 #if CONFIG_VP9_HIGHBITDEPTH
 423         const int diff =
 424             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 425 #else
 426         const int diff = dst[j] - src[j];
 427 #endif
 428         const uint32_t error = diff * diff;
 429         EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
 430             << "Error: 8x8 IDCT has error " << error << " at index " << j;
 431       }
 432     }
 433   }
 434
 435   void RunFwdAccuracyCheck() {
 436     ACMRandom rnd(ACMRandom::DeterministicSeed());
 437     const int count_test_block = 1000;
 438     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
 439     DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
 440     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 441
 442     for (int i = 0; i < count_test_block; ++i) {
 443       double out_r[kNumCoeffs];
 444
 445       // Initialize a test block with input range [-mask_, mask_].
 446       for (int j = 0; j < kNumCoeffs; ++j) {
 447         in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
 448       }
 449
 450       RunFwdTxfm(in, coeff, pitch_);
 451       reference_8x8_dct_2d(in, out_r);
 452       for (int j = 0; j < kNumCoeffs; ++j) {
 453         coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
 454       }
 455
 456       for (int j = 0; j < kNumCoeffs; ++j) {
 457         const int32_t diff = coeff[j] - coeff_r[j];
 458         const uint32_t error = diff * diff;
 459         EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
 460             << "Error: 8x8 DCT has error " << error << " at index " << j;
 461       }
 462     }
 463   }
 464
 465   void CompareInvReference(IdctFunc ref_txfm, int thresh) {
 466     ACMRandom rnd(ACMRandom::DeterministicSeed());
 467     const int count_test_block = 10000;
 468     const int eob = 12;
 469     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 470     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 471     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 472 #if CONFIG_VP9_HIGHBITDEPTH
 473     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 474     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
 475 #endif
 476     const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
 477
 478     for (int i = 0; i < count_test_block; ++i) {
 479       for (int j = 0; j < kNumCoeffs; ++j) {
 480         if (j < eob) {
 481           // Random values less than the threshold, either positive or negative
 482           coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
 483         } else {
 484           coeff[scan[j]] = 0;
 485         }
 486         if (bit_depth_ == VPX_BITS_8) {
 487           dst[j] = 0;
 488           ref[j] = 0;
 489 #if CONFIG_VP9_HIGHBITDEPTH
 490         } else {
 491           dst16[j] = 0;
 492           ref16[j] = 0;
 493 #endif
 494         }
 495       }
 496       if (bit_depth_ == VPX_BITS_8) {
 497         ref_txfm(coeff, ref, pitch_);
 498         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 499 #if CONFIG_VP9_HIGHBITDEPTH
 500       } else {
 501         ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
 502         ASM_REGISTER_STATE_CHECK(
 503             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
 504 #endif
 505       }
 506
 507       for (int j = 0; j < kNumCoeffs; ++j) {
 508 #if CONFIG_VP9_HIGHBITDEPTH
 509         const int diff =
 510             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 511 #else
 512         const int diff = dst[j] - ref[j];
 513 #endif
 514         const uint32_t error = diff * diff;
 515         EXPECT_EQ(0u, error)
 516             << "Error: 8x8 IDCT has error " << error << " at index " << j;
 517       }
 518     }
 519   }
 520   int pitch_;
 521   int tx_type_;
 522   FhtFunc fwd_txfm_ref;
 523   vpx_bit_depth_t bit_depth_;
 524   int mask_;
 525 };
 526
 527 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
 528                        public ::testing::TestWithParam<Dct8x8Param> {
 529  public:
 530   virtual ~FwdTrans8x8DCT() {}
 531
 532   virtual void SetUp() {
 533     fwd_txfm_ = GET_PARAM(0);
 534     inv_txfm_ = GET_PARAM(1);
 535     tx_type_ = GET_PARAM(2);
 536     pitch_ = 8;
 537     fwd_txfm_ref = fdct8x8_ref;
 538     bit_depth_ = GET_PARAM(3);
 539     mask_ = (1 << bit_depth_) - 1;
 540   }
 541
 542   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 543
 544  protected:
 545   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
 546     fwd_txfm_(in, out, stride);
 547   }
 548   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
 549     inv_txfm_(out, dst, stride);
 550   }
 551
 552   FdctFunc fwd_txfm_;
 553   IdctFunc inv_txfm_;
 554 };
 555
 556 TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
 557
 558 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
 559
 560 TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
 561
 562 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
 563
 564 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
 565
 566 class FwdTrans8x8HT : public FwdTrans8x8TestBase,
 567                       public ::testing::TestWithParam<Ht8x8Param> {
 568  public:
 569   virtual ~FwdTrans8x8HT() {}
 570
 571   virtual void SetUp() {
 572     fwd_txfm_ = GET_PARAM(0);
 573     inv_txfm_ = GET_PARAM(1);
 574     tx_type_ = GET_PARAM(2);
 575     pitch_ = 8;
 576     fwd_txfm_ref = fht8x8_ref;
 577     bit_depth_ = GET_PARAM(3);
 578     mask_ = (1 << bit_depth_) - 1;
 579   }
 580
 581   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 582
 583  protected:
 584   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
 585     fwd_txfm_(in, out, stride, tx_type_);
 586   }
 587   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
 588     inv_txfm_(out, dst, stride, tx_type_);
 589   }
 590
 591   FhtFunc fwd_txfm_;
 592   IhtFunc inv_txfm_;
 593 };
 594
 595 TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
 596
 597 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
 598
 599 TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
 600
 601 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 602 class InvTrans8x8DCT : public FwdTrans8x8TestBase,
 603                        public ::testing::TestWithParam<Idct8x8Param> {
 604  public:
 605   virtual ~InvTrans8x8DCT() {}
 606
 607   virtual void SetUp() {
 608     ref_txfm_ = GET_PARAM(0);
 609     inv_txfm_ = GET_PARAM(1);
 610     thresh_ = GET_PARAM(2);
 611     pitch_ = 8;
 612     bit_depth_ = GET_PARAM(3);
 613     mask_ = (1 << bit_depth_) - 1;
 614   }
 615
 616   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 617
 618  protected:
 619   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
 620     inv_txfm_(out, dst, stride);
 621   }
 622   void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
 623
 624   IdctFunc ref_txfm_;
 625   IdctFunc inv_txfm_;
 626   int thresh_;
 627 };
 628 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans8x8DCT);
 629
 630 TEST_P(InvTrans8x8DCT, CompareReference) {
 631   CompareInvReference(ref_txfm_, thresh_);
 632 }
 633 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 634
 635 using std::make_tuple;
 636
 637 #if CONFIG_VP9_HIGHBITDEPTH
 638 INSTANTIATE_TEST_SUITE_P(
 639     C, FwdTrans8x8DCT,
 640     ::testing::Values(
 641         make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
 642         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
 643         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
 644 #else
 645 INSTANTIATE_TEST_SUITE_P(C, FwdTrans8x8DCT,
 646                          ::testing::Values(make_tuple(&vpx_fdct8x8_c,
 647                                                       &vpx_idct8x8_64_add_c, 0,
 648                                                       VPX_BITS_8)));
 649 #endif  // CONFIG_VP9_HIGHBITDEPTH
 650
 651 #if CONFIG_VP9_HIGHBITDEPTH
 652 INSTANTIATE_TEST_SUITE_P(
 653     C, FwdTrans8x8HT,
 654     ::testing::Values(
 655         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
 656         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
 657         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
 658         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
 659         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
 660         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
 661         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
 662         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
 663         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
 664         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
 665         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
 666         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 667 #else
 668 INSTANTIATE_TEST_SUITE_P(
 669     C, FwdTrans8x8HT,
 670     ::testing::Values(
 671         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
 672         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
 673         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
 674         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 675 #endif  // CONFIG_VP9_HIGHBITDEPTH
 676
 677 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
 678 INSTANTIATE_TEST_SUITE_P(NEON, FwdTrans8x8DCT,
 679                          ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
 680                                                       &vpx_idct8x8_64_add_neon,
 681                                                       0, VPX_BITS_8)));
 682
 683 #if !CONFIG_VP9_HIGHBITDEPTH
 684 INSTANTIATE_TEST_SUITE_P(
 685     NEON, FwdTrans8x8HT,
 686     ::testing::Values(
 687         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
 688         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
 689         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
 690         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
 691 #endif  // !CONFIG_VP9_HIGHBITDEPTH
 692 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
 693
 694 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 695 INSTANTIATE_TEST_SUITE_P(SSE2, FwdTrans8x8DCT,
 696                          ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
 697                                                       &vpx_idct8x8_64_add_sse2,
 698                                                       0, VPX_BITS_8)));
 699 INSTANTIATE_TEST_SUITE_P(
 700     SSE2, FwdTrans8x8HT,
 701     ::testing::Values(
 702         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
 703         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
 704         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
 705         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
 706 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 707
 708 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 709 INSTANTIATE_TEST_SUITE_P(
 710     SSE2, FwdTrans8x8DCT,
 711     ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
 712                                  VPX_BITS_8),
 713                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
 714                                  12, VPX_BITS_10),
 715                       make_tuple(&vpx_highbd_fdct8x8_sse2,
 716                                  &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
 717                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
 718                                  12, VPX_BITS_12),
 719                       make_tuple(&vpx_highbd_fdct8x8_sse2,
 720                                  &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
 721
 722 INSTANTIATE_TEST_SUITE_P(
 723     SSE2, FwdTrans8x8HT,
 724     ::testing::Values(
 725         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
 726         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
 727         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
 728         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 729
 730 // Optimizations take effect at a threshold of 6201, so we use a value close to
 731 // that to test both branches.
 732 INSTANTIATE_TEST_SUITE_P(
 733     SSE2, InvTrans8x8DCT,
 734     ::testing::Values(
 735         make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
 736                    VPX_BITS_10),
 737         make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
 738         make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
 739                    VPX_BITS_12),
 740         make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
 741 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 742
 743 #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
 744     !CONFIG_EMULATE_HARDWARE
 745 INSTANTIATE_TEST_SUITE_P(SSSE3, FwdTrans8x8DCT,
 746                          ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
 747                                                       &vpx_idct8x8_64_add_sse2,
 748                                                       0, VPX_BITS_8)));
 749 #endif
 750
 751 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 752 INSTANTIATE_TEST_SUITE_P(MSA, FwdTrans8x8DCT,
 753                          ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
 754                                                       &vpx_idct8x8_64_add_msa,
 755                                                       0, VPX_BITS_8)));
 756 INSTANTIATE_TEST_SUITE_P(
 757     MSA, FwdTrans8x8HT,
 758     ::testing::Values(
 759         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
 760         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
 761         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
 762         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
 763 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 764
 765 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 766 INSTANTIATE_TEST_SUITE_P(VSX, FwdTrans8x8DCT,
 767                          ::testing::Values(make_tuple(&vpx_fdct8x8_c,
 768                                                       &vpx_idct8x8_64_add_vsx,
 769                                                       0, VPX_BITS_8)));
 770 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 771
 772 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 773 INSTANTIATE_TEST_SUITE_P(LSX, FwdTrans8x8DCT,
 774                          ::testing::Values(make_tuple(&vpx_fdct8x8_lsx,
 775                                                       &vpx_idct8x8_64_add_c, 0,
 776                                                       VPX_BITS_8)));
 777 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 778 }  // namespace