test/dct32x32_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16 #include "test/acm_random.h"
  17 #include "test/clear_system_state.h"
  18 #include "test/register_state_check.h"
  19 #include "test/util.h"
  20
  21 #include "./vpx_config.h"
  22 #include "./vp9_rtcd.h"
  23 #include "vp9/common/vp9_entropy.h"
  24 #include "vpx/vpx_codec.h"
  25 #include "vpx/vpx_integer.h"
  26
  27 using libvpx_test::ACMRandom;
  28
  29 namespace {
  30 #ifdef _MSC_VER
  31 static int round(double x) {
  32   if (x < 0)
  33     return static_cast<int>(ceil(x - 0.5));
  34   else
  35     return static_cast<int>(floor(x + 0.5));
  36 }
  37 #endif
  38
  39 const int kNumCoeffs = 1024;
  40 const double kPi = 3.141592653589793238462643383279502884;
  41 void reference_32x32_dct_1d(const double in[32], double out[32]) {
  42   const double kInvSqrt2 = 0.707106781186547524400844362104;
  43   for (int k = 0; k < 32; k++) {
  44     out[k] = 0.0;
  45     for (int n = 0; n < 32; n++)
  46       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  47     if (k == 0)
  48       out[k] = out[k] * kInvSqrt2;
  49   }
  50 }
  51
  52 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  53                             double output[kNumCoeffs]) {
  54   // First transform columns
  55   for (int i = 0; i < 32; ++i) {
  56     double temp_in[32], temp_out[32];
  57     for (int j = 0; j < 32; ++j)
  58       temp_in[j] = input[j*32 + i];
  59     reference_32x32_dct_1d(temp_in, temp_out);
  60     for (int j = 0; j < 32; ++j)
  61       output[j * 32 + i] = temp_out[j];
  62   }
  63   // Then transform rows
  64   for (int i = 0; i < 32; ++i) {
  65     double temp_in[32], temp_out[32];
  66     for (int j = 0; j < 32; ++j)
  67       temp_in[j] = output[j + i*32];
  68     reference_32x32_dct_1d(temp_in, temp_out);
  69     // Scale by some magic number
  70     for (int j = 0; j < 32; ++j)
  71       output[j + i * 32] = temp_out[j] / 4;
  72   }
  73 }
  74
  75 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
  76 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
  77
  78 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
  79     Trans32x32Param;
  80
  81 #if CONFIG_VP9_HIGHBITDEPTH
  82 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
  83   vp9_high_idct32x32_1024_add_c(in, out, stride, 10);
  84 }
  85
  86 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  87   vp9_high_idct32x32_1024_add_c(in, out, stride, 12);
  88 }
  89 #endif
  90
  91 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
  92  public:
  93   virtual ~Trans32x32Test() {}
  94   virtual void SetUp() {
  95     fwd_txfm_ = GET_PARAM(0);
  96     inv_txfm_ = GET_PARAM(1);
  97     version_  = GET_PARAM(2);  // 0: high precision forward transform
  98                                // 1: low precision version for rd loop
  99     bit_depth_ = GET_PARAM(3);
 100     mask_ = (1 << bit_depth_) - 1;
 101   }
 102
 103   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 104
 105  protected:
 106   int version_;
 107   vpx_bit_depth_t bit_depth_;
 108   int mask_;
 109   FwdTxfmFunc fwd_txfm_;
 110   InvTxfmFunc inv_txfm_;
 111 };
 112
 113 TEST_P(Trans32x32Test, AccuracyCheck) {
 114   ACMRandom rnd(ACMRandom::DeterministicSeed());
 115   uint32_t max_error = 0;
 116   int64_t total_error = 0;
 117   const int count_test_block = 1000;
 118   DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
 119   DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
 120   DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
 121   DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 122 #if CONFIG_VP9_HIGHBITDEPTH
 123   DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
 124   DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 125 #endif
 126
 127   for (int i = 0; i < count_test_block; ++i) {
 128     // Initialize a test block with input range [-mask_, mask_].
 129     for (int j = 0; j < kNumCoeffs; ++j) {
 130       if (bit_depth_ == 8) {
 131         src[j] = rnd.Rand8();
 132         dst[j] = rnd.Rand8();
 133         test_input_block[j] = src[j] - dst[j];
 134 #if CONFIG_VP9_HIGHBITDEPTH
 135       } else {
 136         src16[j] = rnd.Rand16() & mask_;
 137         dst16[j] = rnd.Rand16() & mask_;
 138         test_input_block[j] = src16[j] - dst16[j];
 139 #endif
 140       }
 141     }
 142
 143     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
 144     if (bit_depth_ == VPX_BITS_8) {
 145       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 146 #if CONFIG_VP9_HIGHBITDEPTH
 147     } else {
 148       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
 149                                          CONVERT_TO_BYTEPTR(dst16), 32));
 150 #endif
 151     }
 152
 153     for (int j = 0; j < kNumCoeffs; ++j) {
 154 #if CONFIG_VP9_HIGHBITDEPTH
 155       const uint32_t diff =
 156           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 157 #else
 158       const uint32_t diff = dst[j] - src[j];
 159 #endif
 160       const uint32_t error = diff * diff;
 161       if (max_error < error)
 162         max_error = error;
 163       total_error += error;
 164     }
 165   }
 166
 167   if (version_ == 1) {
 168     max_error /= 2;
 169     total_error /= 45;
 170   }
 171
 172   EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
 173       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
 174
 175   EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
 176       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
 177 }
 178
 179 TEST_P(Trans32x32Test, CoeffCheck) {
 180   ACMRandom rnd(ACMRandom::DeterministicSeed());
 181   const int count_test_block = 1000;
 182
 183   DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
 184   DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
 185   DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
 186
 187   for (int i = 0; i < count_test_block; ++i) {
 188     for (int j = 0; j < kNumCoeffs; ++j)
 189       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
 190
 191     const int stride = 32;
 192     vp9_fdct32x32_c(input_block, output_ref_block, stride);
 193     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 194
 195     if (version_ == 0) {
 196       for (int j = 0; j < kNumCoeffs; ++j)
 197         EXPECT_EQ(output_block[j], output_ref_block[j])
 198             << "Error: 32x32 FDCT versions have mismatched coefficients";
 199     } else {
 200       for (int j = 0; j < kNumCoeffs; ++j)
 201         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 202             << "Error: 32x32 FDCT rd has mismatched coefficients";
 203     }
 204   }
 205 }
 206
 207 TEST_P(Trans32x32Test, MemCheck) {
 208   ACMRandom rnd(ACMRandom::DeterministicSeed());
 209   const int count_test_block = 2000;
 210
 211   DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
 212   DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
 213   DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
 214   DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
 215
 216   for (int i = 0; i < count_test_block; ++i) {
 217     // Initialize a test block with input range [-mask_, mask_].
 218     for (int j = 0; j < kNumCoeffs; ++j) {
 219       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
 220       input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
 221     }
 222     if (i == 0) {
 223       for (int j = 0; j < kNumCoeffs; ++j)
 224         input_extreme_block[j] = mask_;
 225     } else if (i == 1) {
 226       for (int j = 0; j < kNumCoeffs; ++j)
 227         input_extreme_block[j] = -mask_;
 228     }
 229
 230     const int stride = 32;
 231     vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
 232     ASM_REGISTER_STATE_CHECK(
 233         fwd_txfm_(input_extreme_block, output_block, stride));
 234
 235     // The minimum quant value is 4.
 236     for (int j = 0; j < kNumCoeffs; ++j) {
 237       if (version_ == 0) {
 238         EXPECT_EQ(output_block[j], output_ref_block[j])
 239             << "Error: 32x32 FDCT versions have mismatched coefficients";
 240       } else {
 241         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 242             << "Error: 32x32 FDCT rd has mismatched coefficients";
 243       }
 244       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
 245           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
 246       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
 247           << "Error: 32x32 FDCT has coefficient larger than "
 248           << "4*DCT_MAX_VALUE";
 249     }
 250   }
 251 }
 252
 253 TEST_P(Trans32x32Test, InverseAccuracy) {
 254   ACMRandom rnd(ACMRandom::DeterministicSeed());
 255   const int count_test_block = 1000;
 256   DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
 257   DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
 258   DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
 259   DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 260 #if CONFIG_VP9_HIGHBITDEPTH
 261   DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
 262   DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 263 #endif
 264
 265   for (int i = 0; i < count_test_block; ++i) {
 266     double out_r[kNumCoeffs];
 267
 268     // Initialize a test block with input range [-255, 255]
 269     for (int j = 0; j < kNumCoeffs; ++j) {
 270       if (bit_depth_ == VPX_BITS_8) {
 271         src[j] = rnd.Rand8();
 272         dst[j] = rnd.Rand8();
 273         in[j] = src[j] - dst[j];
 274 #if CONFIG_VP9_HIGHBITDEPTH
 275       } else {
 276         src16[j] = rnd.Rand16() & mask_;
 277         dst16[j] = rnd.Rand16() & mask_;
 278         in[j] = src16[j] - dst16[j];
 279 #endif
 280       }
 281     }
 282
 283     reference_32x32_dct_2d(in, out_r);
 284     for (int j = 0; j < kNumCoeffs; ++j)
 285       coeff[j] = round(out_r[j]);
 286     if (bit_depth_ == VPX_BITS_8) {
 287       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 288 #if CONFIG_VP9_HIGHBITDEPTH
 289     } else {
 290       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
 291 #endif
 292     }
 293     for (int j = 0; j < kNumCoeffs; ++j) {
 294 #if CONFIG_VP9_HIGHBITDEPTH
 295       const int diff =
 296           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 297 #else
 298       const int diff = dst[j] - src[j];
 299 #endif
 300       const int error = diff * diff;
 301       EXPECT_GE(1, error)
 302           << "Error: 32x32 IDCT has error " << error
 303           << " at index " << j;
 304     }
 305   }
 306 }
 307
 308 using std::tr1::make_tuple;
 309
 310 #if CONFIG_VP9_HIGHBITDEPTH
 311 INSTANTIATE_TEST_CASE_P(
 312     C, Trans32x32Test,
 313     ::testing::Values(
 314         make_tuple(&vp9_high_fdct32x32_c,
 315                    &idct32x32_10, 0, VPX_BITS_10),
 316         make_tuple(&vp9_high_fdct32x32_rd_c,
 317                    &idct32x32_10, 1, VPX_BITS_10),
 318         make_tuple(&vp9_high_fdct32x32_c,
 319                    &idct32x32_12, 0, VPX_BITS_12),
 320         make_tuple(&vp9_high_fdct32x32_rd_c,
 321                    &idct32x32_12, 1, VPX_BITS_12),
 322         make_tuple(&vp9_fdct32x32_c,
 323                    &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
 324         make_tuple(&vp9_fdct32x32_rd_c,
 325                    &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 326 #else
 327 INSTANTIATE_TEST_CASE_P(
 328     C, Trans32x32Test,
 329     ::testing::Values(
 330         make_tuple(&vp9_fdct32x32_c,
 331                    &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
 332         make_tuple(&vp9_fdct32x32_rd_c,
 333                    &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 334 #endif
 335
 336 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
 337 INSTANTIATE_TEST_CASE_P(
 338     NEON, Trans32x32Test,
 339     ::testing::Values(
 340         make_tuple(&vp9_fdct32x32_c,
 341                    &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
 342         make_tuple(&vp9_fdct32x32_rd_c,
 343                    &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
 344 #endif
 345
 346 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
 347 INSTANTIATE_TEST_CASE_P(
 348     SSE2, Trans32x32Test,
 349     ::testing::Values(
 350         make_tuple(&vp9_fdct32x32_sse2,
 351                    &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 352         make_tuple(&vp9_fdct32x32_rd_sse2,
 353                    &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 354 #endif
 355
 356 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
 357 INSTANTIATE_TEST_CASE_P(
 358     AVX2, Trans32x32Test,
 359     ::testing::Values(
 360         make_tuple(&vp9_fdct32x32_avx2,
 361                    &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 362         make_tuple(&vp9_fdct32x32_rd_avx2,
 363                    &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 364 #endif
 365 }  // namespace