test/dct32x32_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16 #include "test/acm_random.h"
  17 #include "test/clear_system_state.h"
  18 #include "test/register_state_check.h"
  19 #include "test/util.h"
  20
  21 #include "./vpx_config.h"
  22 #include "./vp9_rtcd.h"
  23 #include "vp9/common/vp9_entropy.h"
  24 #include "vpx/vpx_integer.h"
  25
  26 using libvpx_test::ACMRandom;
  27
  28 namespace {
  29 #ifdef _MSC_VER
  30 static int round(double x) {
  31   if (x < 0)
  32     return static_cast<int>(ceil(x - 0.5));
  33   else
  34     return static_cast<int>(floor(x + 0.5));
  35 }
  36 #endif
  37
  38 const int kNumCoeffs = 1024;
  39 const double kPi = 3.141592653589793238462643383279502884;
  40 void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
  41   const double kInvSqrt2 = 0.707106781186547524400844362104;
  42   for (int k = 0; k < 32; k++) {
  43     out[k] = 0.0;
  44     for (int n = 0; n < 32; n++)
  45       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  46     if (k == 0)
  47       out[k] = out[k] * kInvSqrt2;
  48   }
  49 }
  50
  51 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  52                             double output[kNumCoeffs]) {
  53   // First transform columns
  54   for (int i = 0; i < 32; ++i) {
  55     double temp_in[32], temp_out[32];
  56     for (int j = 0; j < 32; ++j)
  57       temp_in[j] = input[j*32 + i];
  58     reference_32x32_dct_1d(temp_in, temp_out, 1);
  59     for (int j = 0; j < 32; ++j)
  60       output[j * 32 + i] = temp_out[j];
  61   }
  62   // Then transform rows
  63   for (int i = 0; i < 32; ++i) {
  64     double temp_in[32], temp_out[32];
  65     for (int j = 0; j < 32; ++j)
  66       temp_in[j] = output[j + i*32];
  67     reference_32x32_dct_1d(temp_in, temp_out, 1);
  68     // Scale by some magic number
  69     for (int j = 0; j < 32; ++j)
  70       output[j + i * 32] = temp_out[j] / 4;
  71   }
  72 }
  73
  74 typedef void (*fwd_txfm_t)(const int16_t *in, int16_t *out, int stride);
  75 typedef void (*inv_txfm_t)(const int16_t *in, uint8_t *out, int stride);
  76
  77 typedef std::tr1::tuple<fwd_txfm_t, inv_txfm_t, int> trans_32x32_param_t;
  78
  79 class Trans32x32Test : public ::testing::TestWithParam<trans_32x32_param_t> {
  80  public:
  81   virtual ~Trans32x32Test() {}
  82   virtual void SetUp() {
  83     fwd_txfm_ = GET_PARAM(0);
  84     inv_txfm_ = GET_PARAM(1);
  85     version_  = GET_PARAM(2);  // 0: high precision forward transform
  86                                // 1: low precision version for rd loop
  87   }
  88
  89   virtual void TearDown() { libvpx_test::ClearSystemState(); }
  90
  91  protected:
  92   int version_;
  93   fwd_txfm_t fwd_txfm_;
  94   inv_txfm_t inv_txfm_;
  95 };
  96
  97 TEST_P(Trans32x32Test, AccuracyCheck) {
  98   ACMRandom rnd(ACMRandom::DeterministicSeed());
  99   uint32_t max_error = 0;
 100   int64_t total_error = 0;
 101   const int count_test_block = 1000;
 102   DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
 103   DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
 104   DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
 105   DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 106
 107   for (int i = 0; i < count_test_block; ++i) {
 108     // Initialize a test block with input range [-255, 255].
 109     for (int j = 0; j < kNumCoeffs; ++j) {
 110       src[j] = rnd.Rand8();
 111       dst[j] = rnd.Rand8();
 112       test_input_block[j] = src[j] - dst[j];
 113     }
 114
 115     REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
 116     REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 117
 118     for (int j = 0; j < kNumCoeffs; ++j) {
 119       const uint32_t diff = dst[j] - src[j];
 120       const uint32_t error = diff * diff;
 121       if (max_error < error)
 122         max_error = error;
 123       total_error += error;
 124     }
 125   }
 126
 127   if (version_ == 1) {
 128     max_error /= 2;
 129     total_error /= 45;
 130   }
 131
 132   EXPECT_GE(1u, max_error)
 133       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
 134
 135   EXPECT_GE(count_test_block, total_error)
 136       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
 137 }
 138
 139 TEST_P(Trans32x32Test, CoeffCheck) {
 140   ACMRandom rnd(ACMRandom::DeterministicSeed());
 141   const int count_test_block = 1000;
 142
 143   DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
 144   DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
 145   DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
 146
 147   for (int i = 0; i < count_test_block; ++i) {
 148     for (int j = 0; j < kNumCoeffs; ++j)
 149       input_block[j] = rnd.Rand8() - rnd.Rand8();
 150
 151     const int stride = 32;
 152     vp9_fdct32x32_c(input_block, output_ref_block, stride);
 153     REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 154
 155     if (version_ == 0) {
 156       for (int j = 0; j < kNumCoeffs; ++j)
 157         EXPECT_EQ(output_block[j], output_ref_block[j])
 158             << "Error: 32x32 FDCT versions have mismatched coefficients";
 159     } else {
 160       for (int j = 0; j < kNumCoeffs; ++j)
 161         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 162             << "Error: 32x32 FDCT rd has mismatched coefficients";
 163     }
 164   }
 165 }
 166
 167 TEST_P(Trans32x32Test, MemCheck) {
 168   ACMRandom rnd(ACMRandom::DeterministicSeed());
 169   const int count_test_block = 2000;
 170
 171   DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
 172   DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
 173   DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
 174   DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
 175
 176   for (int i = 0; i < count_test_block; ++i) {
 177     // Initialize a test block with input range [-255, 255].
 178     for (int j = 0; j < kNumCoeffs; ++j) {
 179       input_block[j] = rnd.Rand8() - rnd.Rand8();
 180       input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
 181     }
 182     if (i == 0)
 183       for (int j = 0; j < kNumCoeffs; ++j)
 184         input_extreme_block[j] = 255;
 185     if (i == 1)
 186       for (int j = 0; j < kNumCoeffs; ++j)
 187         input_extreme_block[j] = -255;
 188
 189     const int stride = 32;
 190     vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
 191     REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, stride));
 192
 193     // The minimum quant value is 4.
 194     for (int j = 0; j < kNumCoeffs; ++j) {
 195       if (version_ == 0) {
 196         EXPECT_EQ(output_block[j], output_ref_block[j])
 197             << "Error: 32x32 FDCT versions have mismatched coefficients";
 198       } else {
 199         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 200             << "Error: 32x32 FDCT rd has mismatched coefficients";
 201       }
 202       EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
 203           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
 204       EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
 205           << "Error: 32x32 FDCT has coefficient larger than "
 206           << "4*DCT_MAX_VALUE";
 207     }
 208   }
 209 }
 210
 211 TEST_P(Trans32x32Test, InverseAccuracy) {
 212   ACMRandom rnd(ACMRandom::DeterministicSeed());
 213   const int count_test_block = 1000;
 214   DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
 215   DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
 216   DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
 217   DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 218
 219   for (int i = 0; i < count_test_block; ++i) {
 220     double out_r[kNumCoeffs];
 221
 222     // Initialize a test block with input range [-255, 255]
 223     for (int j = 0; j < kNumCoeffs; ++j) {
 224       src[j] = rnd.Rand8();
 225       dst[j] = rnd.Rand8();
 226       in[j] = src[j] - dst[j];
 227     }
 228
 229     reference_32x32_dct_2d(in, out_r);
 230     for (int j = 0; j < kNumCoeffs; ++j)
 231       coeff[j] = round(out_r[j]);
 232     REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 233     for (int j = 0; j < kNumCoeffs; ++j) {
 234       const int diff = dst[j] - src[j];
 235       const int error = diff * diff;
 236       EXPECT_GE(1, error)
 237           << "Error: 32x32 IDCT has error " << error
 238           << " at index " << j;
 239     }
 240   }
 241 }
 242
 243 using std::tr1::make_tuple;
 244
 245 INSTANTIATE_TEST_CASE_P(
 246     C, Trans32x32Test,
 247     ::testing::Values(
 248         make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),
 249         make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));
 250
 251 #if HAVE_NEON_ASM
 252 INSTANTIATE_TEST_CASE_P(
 253     NEON, Trans32x32Test,
 254     ::testing::Values(
 255         make_tuple(&vp9_fdct32x32_c,
 256                    &vp9_idct32x32_1024_add_neon, 0),
 257         make_tuple(&vp9_fdct32x32_rd_c,
 258                    &vp9_idct32x32_1024_add_neon, 1)));
 259 #endif
 260
 261 #if HAVE_SSE2
 262 INSTANTIATE_TEST_CASE_P(
 263     SSE2, Trans32x32Test,
 264     ::testing::Values(
 265         make_tuple(&vp9_fdct32x32_sse2,
 266                    &vp9_idct32x32_1024_add_sse2, 0),
 267         make_tuple(&vp9_fdct32x32_rd_sse2,
 268                    &vp9_idct32x32_1024_add_sse2, 1)));
 269 #endif
 270
 271 #if HAVE_AVX2
 272 INSTANTIATE_TEST_CASE_P(
 273     AVX2, Trans32x32Test,
 274     ::testing::Values(
 275         make_tuple(&vp9_fdct32x32_avx2,
 276                    &vp9_idct32x32_1024_add_sse2, 0),
 277         make_tuple(&vp9_fdct32x32_rd_avx2,
 278                    &vp9_idct32x32_1024_add_sse2, 1)));
 279 #endif
 280 }  // namespace