test/dct32x32_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16 #include "test/acm_random.h"
  17 #include "test/clear_system_state.h"
  18 #include "test/register_state_check.h"
  19 #include "test/util.h"
  20
  21 #include "./vpx_config.h"
  22 #include "./vp9_rtcd.h"
  23 #include "vp9/common/vp9_entropy.h"
  24 #include "vpx/vpx_integer.h"
  25
  26 using libvpx_test::ACMRandom;
  27
  28 namespace {
  29 #ifdef _MSC_VER
  30 static int round(double x) {
  31   if (x < 0)
  32     return static_cast<int>(ceil(x - 0.5));
  33   else
  34     return static_cast<int>(floor(x + 0.5));
  35 }
  36 #endif
  37
  38 const int kNumCoeffs = 1024;
  39 const double kPi = 3.141592653589793238462643383279502884;
  40 void reference_32x32_dct_1d(const double in[32], double out[32]) {
  41   const double kInvSqrt2 = 0.707106781186547524400844362104;
  42   for (int k = 0; k < 32; k++) {
  43     out[k] = 0.0;
  44     for (int n = 0; n < 32; n++)
  45       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  46     if (k == 0)
  47       out[k] = out[k] * kInvSqrt2;
  48   }
  49 }
  50
  51 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  52                             double output[kNumCoeffs]) {
  53   // First transform columns
  54   for (int i = 0; i < 32; ++i) {
  55     double temp_in[32], temp_out[32];
  56     for (int j = 0; j < 32; ++j)
  57       temp_in[j] = input[j*32 + i];
  58     reference_32x32_dct_1d(temp_in, temp_out);
  59     for (int j = 0; j < 32; ++j)
  60       output[j * 32 + i] = temp_out[j];
  61   }
  62   // Then transform rows
  63   for (int i = 0; i < 32; ++i) {
  64     double temp_in[32], temp_out[32];
  65     for (int j = 0; j < 32; ++j)
  66       temp_in[j] = output[j + i*32];
  67     reference_32x32_dct_1d(temp_in, temp_out);
  68     // Scale by some magic number
  69     for (int j = 0; j < 32; ++j)
  70       output[j + i * 32] = temp_out[j] / 4;
  71   }
  72 }
  73
  74 typedef void (*FwdTxfmFunc)(const int16_t *in, int16_t *out, int stride);
  75 typedef void (*InvTxfmFunc)(const int16_t *in, uint8_t *out, int stride);
  76
  77 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int> Trans32x32Param;
  78
  79 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
  80  public:
  81   virtual ~Trans32x32Test() {}
  82   virtual void SetUp() {
  83     fwd_txfm_ = GET_PARAM(0);
  84     inv_txfm_ = GET_PARAM(1);
  85     version_  = GET_PARAM(2);  // 0: high precision forward transform
  86                                // 1: low precision version for rd loop
  87   }
  88
  89   virtual void TearDown() { libvpx_test::ClearSystemState(); }
  90
  91  protected:
  92   int version_;
  93   FwdTxfmFunc fwd_txfm_;
  94   InvTxfmFunc inv_txfm_;
  95 };
  96
  97 TEST_P(Trans32x32Test, AccuracyCheck) {
  98   ACMRandom rnd(ACMRandom::DeterministicSeed());
  99   uint32_t max_error = 0;
 100   int64_t total_error = 0;
 101   const int count_test_block = 1000;
 102   DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
 103   DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
 104   DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
 105   DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 106
 107   for (int i = 0; i < count_test_block; ++i) {
 108     // Initialize a test block with input range [-255, 255].
 109     for (int j = 0; j < kNumCoeffs; ++j) {
 110       src[j] = rnd.Rand8();
 111       dst[j] = rnd.Rand8();
 112       test_input_block[j] = src[j] - dst[j];
 113     }
 114
 115     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
 116     ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 117
 118     for (int j = 0; j < kNumCoeffs; ++j) {
 119       const uint32_t diff = dst[j] - src[j];
 120       const uint32_t error = diff * diff;
 121       if (max_error < error)
 122         max_error = error;
 123       total_error += error;
 124     }
 125   }
 126
 127   if (version_ == 1) {
 128     max_error /= 2;
 129     total_error /= 45;
 130   }
 131
 132   EXPECT_GE(1u, max_error)
 133       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
 134
 135   EXPECT_GE(count_test_block, total_error)
 136       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
 137 }
 138
 139 TEST_P(Trans32x32Test, CoeffCheck) {
 140   ACMRandom rnd(ACMRandom::DeterministicSeed());
 141   const int count_test_block = 1000;
 142
 143   DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
 144   DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
 145   DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
 146
 147   for (int i = 0; i < count_test_block; ++i) {
 148     for (int j = 0; j < kNumCoeffs; ++j)
 149       input_block[j] = rnd.Rand8() - rnd.Rand8();
 150
 151     const int stride = 32;
 152     vp9_fdct32x32_c(input_block, output_ref_block, stride);
 153     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 154
 155     if (version_ == 0) {
 156       for (int j = 0; j < kNumCoeffs; ++j)
 157         EXPECT_EQ(output_block[j], output_ref_block[j])
 158             << "Error: 32x32 FDCT versions have mismatched coefficients";
 159     } else {
 160       for (int j = 0; j < kNumCoeffs; ++j)
 161         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 162             << "Error: 32x32 FDCT rd has mismatched coefficients";
 163     }
 164   }
 165 }
 166
 167 TEST_P(Trans32x32Test, MemCheck) {
 168   ACMRandom rnd(ACMRandom::DeterministicSeed());
 169   const int count_test_block = 2000;
 170
 171   DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
 172   DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
 173   DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
 174   DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
 175
 176   for (int i = 0; i < count_test_block; ++i) {
 177     // Initialize a test block with input range [-255, 255].
 178     for (int j = 0; j < kNumCoeffs; ++j) {
 179       input_block[j] = rnd.Rand8() - rnd.Rand8();
 180       input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
 181     }
 182     if (i == 0) {
 183       for (int j = 0; j < kNumCoeffs; ++j)
 184         input_extreme_block[j] = 255;
 185     } else if (i == 1) {
 186       for (int j = 0; j < kNumCoeffs; ++j)
 187         input_extreme_block[j] = -255;
 188     }
 189
 190     const int stride = 32;
 191     vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
 192     ASM_REGISTER_STATE_CHECK(
 193         fwd_txfm_(input_extreme_block, output_block, stride));
 194
 195     // The minimum quant value is 4.
 196     for (int j = 0; j < kNumCoeffs; ++j) {
 197       if (version_ == 0) {
 198         EXPECT_EQ(output_block[j], output_ref_block[j])
 199             << "Error: 32x32 FDCT versions have mismatched coefficients";
 200       } else {
 201         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 202             << "Error: 32x32 FDCT rd has mismatched coefficients";
 203       }
 204       EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
 205           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
 206       EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
 207           << "Error: 32x32 FDCT has coefficient larger than "
 208           << "4*DCT_MAX_VALUE";
 209     }
 210   }
 211 }
 212
 213 TEST_P(Trans32x32Test, InverseAccuracy) {
 214   ACMRandom rnd(ACMRandom::DeterministicSeed());
 215   const int count_test_block = 1000;
 216   DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
 217   DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
 218   DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
 219   DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 220
 221   for (int i = 0; i < count_test_block; ++i) {
 222     double out_r[kNumCoeffs];
 223
 224     // Initialize a test block with input range [-255, 255]
 225     for (int j = 0; j < kNumCoeffs; ++j) {
 226       src[j] = rnd.Rand8();
 227       dst[j] = rnd.Rand8();
 228       in[j] = src[j] - dst[j];
 229     }
 230
 231     reference_32x32_dct_2d(in, out_r);
 232     for (int j = 0; j < kNumCoeffs; ++j)
 233       coeff[j] = round(out_r[j]);
 234     ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 235     for (int j = 0; j < kNumCoeffs; ++j) {
 236       const int diff = dst[j] - src[j];
 237       const int error = diff * diff;
 238       EXPECT_GE(1, error)
 239           << "Error: 32x32 IDCT has error " << error
 240           << " at index " << j;
 241     }
 242   }
 243 }
 244
 245 using std::tr1::make_tuple;
 246
 247 INSTANTIATE_TEST_CASE_P(
 248     C, Trans32x32Test,
 249     ::testing::Values(
 250         make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),
 251         make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));
 252
 253 #if HAVE_NEON_ASM
 254 INSTANTIATE_TEST_CASE_P(
 255     NEON, Trans32x32Test,
 256     ::testing::Values(
 257         make_tuple(&vp9_fdct32x32_c,
 258                    &vp9_idct32x32_1024_add_neon, 0),
 259         make_tuple(&vp9_fdct32x32_rd_c,
 260                    &vp9_idct32x32_1024_add_neon, 1)));
 261 #endif
 262
 263 #if HAVE_SSE2
 264 INSTANTIATE_TEST_CASE_P(
 265     SSE2, Trans32x32Test,
 266     ::testing::Values(
 267         make_tuple(&vp9_fdct32x32_sse2,
 268                    &vp9_idct32x32_1024_add_sse2, 0),
 269         make_tuple(&vp9_fdct32x32_rd_sse2,
 270                    &vp9_idct32x32_1024_add_sse2, 1)));
 271 #endif
 272
 273 #if HAVE_AVX2
 274 INSTANTIATE_TEST_CASE_P(
 275     AVX2, Trans32x32Test,
 276     ::testing::Values(
 277         make_tuple(&vp9_fdct32x32_avx2,
 278                    &vp9_idct32x32_1024_add_sse2, 0),
 279         make_tuple(&vp9_fdct32x32_rd_avx2,
 280                    &vp9_idct32x32_1024_add_sse2, 1)));
 281 #endif
 282 }  // namespace