Merge "Ensure the error-concealment code is available"
[platform/upstream/libvpx.git] / test / lpf_8_test.cc
1 /*
2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include <cmath>
12 #include <cstdlib>
13 #include <string>
14
15 #include "third_party/googletest/src/include/gtest/gtest.h"
16 #include "test/acm_random.h"
17 #include "test/clear_system_state.h"
18 #include "test/register_state_check.h"
19 #include "test/util.h"
20
21 #include "./vpx_config.h"
22 #include "./vp9_rtcd.h"
23 #include "vp9/common/vp9_entropy.h"
24 #include "vp9/common/vp9_loopfilter.h"
25 #include "vpx/vpx_integer.h"
26
27 using libvpx_test::ACMRandom;
28
29 namespace {
30 // Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
31 //                                         16 Coefs within filtered section
32 //                                         8  Coeffs following filtered section
33 const int kNumCoeffs = 1024;
34
35 const int number_of_iterations = 10000;
36
37 #if CONFIG_VP9_HIGHBITDEPTH
38 typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
39                           const uint8_t *limit, const uint8_t *thresh,
40                           int count, int bd);
41 typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
42                                const uint8_t *limit0, const uint8_t *thresh0,
43                                const uint8_t *blimit1, const uint8_t *limit1,
44                                const uint8_t *thresh1, int bd);
45 #else
46 typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
47                           const uint8_t *limit, const uint8_t *thresh,
48                           int count);
49 typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
50                                const uint8_t *limit0, const uint8_t *thresh0,
51                                const uint8_t *blimit1, const uint8_t *limit1,
52                                const uint8_t *thresh1);
53 #endif  // CONFIG_VP9_HIGHBITDEPTH
54
55 typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
56 typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
57
58 #if HAVE_SSE2
59 #if CONFIG_VP9_HIGHBITDEPTH
60 void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
61                               const uint8_t *limit, const uint8_t *thresh,
62                               int count, int bd) {
63   vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
64 }
65
66 void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
67                            const uint8_t *limit, const uint8_t *thresh,
68                            int count, int bd) {
69   vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
70 }
71
72 void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
73                                    const uint8_t *limit, const uint8_t *thresh,
74                                    int count, int bd) {
75   vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
76 }
77
78 void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
79                                 const uint8_t *limit, const uint8_t *thresh,
80                                 int count, int bd) {
81   vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
82 }
83 #else
84 void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
85                               const uint8_t *limit, const uint8_t *thresh,
86                               int count) {
87   vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
88 }
89
90 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
91                            const uint8_t *limit, const uint8_t *thresh,
92                            int count) {
93   vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
94 }
95
96 void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
97                                    const uint8_t *limit, const uint8_t *thresh,
98                                    int count) {
99   vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
100 }
101
102 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
103                                 const uint8_t *limit, const uint8_t *thresh,
104                                 int count) {
105   vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
106 }
107 #endif  // CONFIG_VP9_HIGHBITDEPTH
108 #endif  // HAVE_SSE2
109
110 #if HAVE_NEON_ASM
111 #if CONFIG_VP9_HIGHBITDEPTH
112 // No neon high bitdepth functions.
113 #else
114 void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
115                               const uint8_t *limit, const uint8_t *thresh,
116                               int count) {
117   vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
118 }
119
120 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
121                            const uint8_t *limit, const uint8_t *thresh,
122                            int count) {
123   vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
124 }
125
126 void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
127                                    const uint8_t *limit, const uint8_t *thresh,
128                                    int count) {
129   vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
130 }
131
132 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
133                                 const uint8_t *limit, const uint8_t *thresh,
134                                 int count) {
135   vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
136 }
137 #endif  // CONFIG_VP9_HIGHBITDEPTH
138 #endif  // HAVE_NEON_ASM
139
140 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
141  public:
142   virtual ~Loop8Test6Param() {}
143   virtual void SetUp() {
144     loopfilter_op_ = GET_PARAM(0);
145     ref_loopfilter_op_ = GET_PARAM(1);
146     bit_depth_ = GET_PARAM(2);
147     mask_ = (1 << bit_depth_) - 1;
148   }
149
150   virtual void TearDown() { libvpx_test::ClearSystemState(); }
151
152  protected:
153   int bit_depth_;
154   int mask_;
155   loop_op_t loopfilter_op_;
156   loop_op_t ref_loopfilter_op_;
157 };
158
159 class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
160  public:
161   virtual ~Loop8Test9Param() {}
162   virtual void SetUp() {
163     loopfilter_op_ = GET_PARAM(0);
164     ref_loopfilter_op_ = GET_PARAM(1);
165     bit_depth_ = GET_PARAM(2);
166     mask_ = (1 << bit_depth_) - 1;
167   }
168
169   virtual void TearDown() { libvpx_test::ClearSystemState(); }
170
171  protected:
172   int bit_depth_;
173   int mask_;
174   dual_loop_op_t loopfilter_op_;
175   dual_loop_op_t ref_loopfilter_op_;
176 };
177
178 TEST_P(Loop8Test6Param, OperationCheck) {
179   ACMRandom rnd(ACMRandom::DeterministicSeed());
180   const int count_test_block = number_of_iterations;
181 #if CONFIG_VP9_HIGHBITDEPTH
182   int32_t bd = bit_depth_;
183   DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
184   DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
185 #else
186   DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
187   DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
188 #endif  // CONFIG_VP9_HIGHBITDEPTH
189   int err_count_total = 0;
190   int first_failure = -1;
191   for (int i = 0; i < count_test_block; ++i) {
192     int err_count = 0;
193     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
194     DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
195         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
196         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
197     };
198     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
199     DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
200         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
201         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
202     };
203     tmp = rnd.Rand8();
204     DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = {
205         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
206         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
207     };
208     int32_t p = kNumCoeffs/32;
209     int count = 1;
210
211     uint16_t tmp_s[kNumCoeffs];
212     int j = 0;
213     while (j < kNumCoeffs) {
214       uint8_t val = rnd.Rand8();
215       if (val & 0x80) {  // 50% chance to choose a new value.
216         tmp_s[j] = rnd.Rand16();
217         j++;
218       } else {  // 50% chance to repeat previous value in row X times
219         int k = 0;
220         while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
221           if (j < 1) {
222             tmp_s[j] = rnd.Rand16();
223           } else if (val & 0x20) {  // Increment by an value within the limit
224             tmp_s[j] = (tmp_s[j - 1] + (*limit - 1));
225           } else {  // Decrement by an value within the limit
226             tmp_s[j] = (tmp_s[j - 1] - (*limit - 1));
227           }
228           j++;
229         }
230       }
231     }
232     for (j = 0; j < kNumCoeffs; j++) {
233       if (i % 2) {
234         s[j] = tmp_s[j] & mask_;
235       } else {
236         s[j] = tmp_s[p * (j % p) + j / p] & mask_;
237       }
238       ref_s[j] = s[j];
239     }
240 #if CONFIG_VP9_HIGHBITDEPTH
241     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
242     ASM_REGISTER_STATE_CHECK(
243         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
244 #else
245     ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
246     ASM_REGISTER_STATE_CHECK(
247         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
248 #endif  // CONFIG_VP9_HIGHBITDEPTH
249
250     for (int j = 0; j < kNumCoeffs; ++j) {
251       err_count += ref_s[j] != s[j];
252     }
253     if (err_count && !err_count_total) {
254       first_failure = i;
255     }
256     err_count_total += err_count;
257   }
258   EXPECT_EQ(0, err_count_total)
259       << "Error: Loop8Test6Param, C output doesn't match SSE2 "
260          "loopfilter output. "
261       << "First failed at test case " << first_failure;
262 }
263
264 TEST_P(Loop8Test6Param, ValueCheck) {
265   ACMRandom rnd(ACMRandom::DeterministicSeed());
266   const int count_test_block = number_of_iterations;
267 #if CONFIG_VP9_HIGHBITDEPTH
268   const int32_t bd = bit_depth_;
269   DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
270   DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
271 #else
272   DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
273   DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
274 #endif  // CONFIG_VP9_HIGHBITDEPTH
275   int err_count_total = 0;
276   int first_failure = -1;
277
278   // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
279   // function of sharpness_lvl and the loopfilter lvl as:
280   // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
281   // ...
282   // vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
283   //            SIMD_WIDTH);
284   // This means that the largest value for mblim will occur when sharpness_lvl
285   // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
286   // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
287   // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
288   // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
289
290   for (int i = 0; i < count_test_block; ++i) {
291     int err_count = 0;
292     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
293     DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
294         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
295         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
296     };
297     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
298     DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
299         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
300         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
301     };
302     tmp = rnd.Rand8();
303     DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = {
304         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
305         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
306     };
307     int32_t p = kNumCoeffs / 32;
308     int count = 1;
309     for (int j = 0; j < kNumCoeffs; ++j) {
310       s[j] = rnd.Rand16() & mask_;
311       ref_s[j] = s[j];
312     }
313 #if CONFIG_VP9_HIGHBITDEPTH
314     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
315     ASM_REGISTER_STATE_CHECK(
316         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
317 #else
318     ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
319     ASM_REGISTER_STATE_CHECK(
320         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
321 #endif  // CONFIG_VP9_HIGHBITDEPTH
322     for (int j = 0; j < kNumCoeffs; ++j) {
323       err_count += ref_s[j] != s[j];
324     }
325     if (err_count && !err_count_total) {
326       first_failure = i;
327     }
328     err_count_total += err_count;
329   }
330   EXPECT_EQ(0, err_count_total)
331       << "Error: Loop8Test6Param, C output doesn't match SSE2 "
332          "loopfilter output. "
333       << "First failed at test case " << first_failure;
334 }
335
336 TEST_P(Loop8Test9Param, OperationCheck) {
337   ACMRandom rnd(ACMRandom::DeterministicSeed());
338   const int count_test_block = number_of_iterations;
339 #if CONFIG_VP9_HIGHBITDEPTH
340   const int32_t bd = bit_depth_;
341   DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
342   DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
343 #else
344   DECLARE_ALIGNED_ARRAY(8,  uint8_t,  s, kNumCoeffs);
345   DECLARE_ALIGNED_ARRAY(8,  uint8_t,  ref_s, kNumCoeffs);
346 #endif  // CONFIG_VP9_HIGHBITDEPTH
347   int err_count_total = 0;
348   int first_failure = -1;
349   for (int i = 0; i < count_test_block; ++i) {
350     int err_count = 0;
351     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
352     DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
353         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
354         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
355     };
356     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
357     DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
358         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
359         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
360     };
361     tmp = rnd.Rand8();
362     DECLARE_ALIGNED(16, const uint8_t, thresh0[16]) = {
363         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
364         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
365     };
366     tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
367     DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
368         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
369         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
370     };
371     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
372     DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
373         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
374         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
375     };
376     tmp = rnd.Rand8();
377     DECLARE_ALIGNED(16, const uint8_t, thresh1[16]) = {
378         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
379         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
380     };
381     int32_t p = kNumCoeffs / 32;
382     uint16_t tmp_s[kNumCoeffs];
383     int j = 0;
384     const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
385     while (j < kNumCoeffs) {
386       uint8_t val = rnd.Rand8();
387       if (val & 0x80) {  // 50% chance to choose a new value.
388         tmp_s[j] = rnd.Rand16();
389         j++;
390       } else {  // 50% chance to repeat previous value in row X times.
391         int k = 0;
392         while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
393           if (j < 1) {
394             tmp_s[j] = rnd.Rand16();
395           } else if (val & 0x20) {  // Increment by a value within the limit.
396             tmp_s[j] = (tmp_s[j - 1] + (limit - 1));
397           } else {  // Decrement by an value within the limit.
398             tmp_s[j] = (tmp_s[j - 1] - (limit - 1));
399           }
400           j++;
401         }
402       }
403     }
404     for (j = 0; j < kNumCoeffs; j++) {
405       if (i % 2) {
406         s[j] = tmp_s[j] & mask_;
407       } else {
408         s[j] = tmp_s[p * (j % p) + j / p] & mask_;
409       }
410       ref_s[j] = s[j];
411     }
412 #if CONFIG_VP9_HIGHBITDEPTH
413     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
414                        blimit1, limit1, thresh1, bd);
415     ASM_REGISTER_STATE_CHECK(
416         loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0,
417                        blimit1, limit1, thresh1, bd));
418 #else
419     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
420                        blimit1, limit1, thresh1);
421     ASM_REGISTER_STATE_CHECK(
422         loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0,
423                        blimit1, limit1, thresh1));
424 #endif  // CONFIG_VP9_HIGHBITDEPTH
425     for (int j = 0; j < kNumCoeffs; ++j) {
426       err_count += ref_s[j] != s[j];
427     }
428     if (err_count && !err_count_total) {
429       first_failure = i;
430     }
431     err_count_total += err_count;
432   }
433   EXPECT_EQ(0, err_count_total)
434       << "Error: Loop8Test9Param, C output doesn't match SSE2 "
435          "loopfilter output. "
436       << "First failed at test case " << first_failure;
437 }
438
439 TEST_P(Loop8Test9Param, ValueCheck) {
440   ACMRandom rnd(ACMRandom::DeterministicSeed());
441   const int count_test_block = number_of_iterations;
442 #if CONFIG_VP9_HIGHBITDEPTH
443   DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
444   DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
445 #else
446   DECLARE_ALIGNED_ARRAY(8,  uint8_t, s, kNumCoeffs);
447   DECLARE_ALIGNED_ARRAY(8,  uint8_t, ref_s, kNumCoeffs);
448 #endif  // CONFIG_VP9_HIGHBITDEPTH
449   int err_count_total = 0;
450   int first_failure = -1;
451   for (int i = 0; i < count_test_block; ++i) {
452     int err_count = 0;
453     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
454     DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
455         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
456         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
457     };
458     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
459     DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
460         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
461         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
462     };
463     tmp = rnd.Rand8();
464     DECLARE_ALIGNED(16, const uint8_t, thresh0[16]) = {
465         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
466         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
467     };
468     tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
469     DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
470         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
471         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
472     };
473     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
474     DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
475         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
476         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
477     };
478     tmp = rnd.Rand8();
479     DECLARE_ALIGNED(16, const uint8_t, thresh1[16]) = {
480         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
481         tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
482     };
483     int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
484     for (int j = 0; j < kNumCoeffs; ++j) {
485       s[j] = rnd.Rand16() & mask_;
486       ref_s[j] = s[j];
487     }
488 #if CONFIG_VP9_HIGHBITDEPTH
489     const int32_t bd = bit_depth_;
490     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
491                        blimit1, limit1, thresh1, bd);
492     ASM_REGISTER_STATE_CHECK(
493         loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
494                        thresh0, blimit1, limit1, thresh1, bd));
495 #else
496     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
497                        blimit1, limit1, thresh1);
498     ASM_REGISTER_STATE_CHECK(
499         loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0,
500                        blimit1, limit1, thresh1));
501 #endif  // CONFIG_VP9_HIGHBITDEPTH
502     for (int j = 0; j < kNumCoeffs; ++j) {
503       err_count += ref_s[j] != s[j];
504     }
505     if (err_count && !err_count_total) {
506       first_failure = i;
507     }
508     err_count_total += err_count;
509   }
510   EXPECT_EQ(0, err_count_total)
511       << "Error: Loop8Test9Param, C output doesn't match SSE2"
512          "loopfilter output. "
513       << "First failed at test case " << first_failure;
514 }
515
516 using std::tr1::make_tuple;
517
518 #if HAVE_SSE2
519 #if CONFIG_VP9_HIGHBITDEPTH
520 INSTANTIATE_TEST_CASE_P(
521     SSE2, Loop8Test6Param,
522     ::testing::Values(
523         make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
524                    &vp9_highbd_lpf_horizontal_4_c, 8),
525         make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
526                    &vp9_highbd_lpf_vertical_4_c, 8),
527         make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
528                    &vp9_highbd_lpf_horizontal_8_c, 8),
529         make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
530                    &vp9_highbd_lpf_horizontal_16_c, 8),
531         make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
532                    &vp9_highbd_lpf_vertical_8_c, 8),
533         make_tuple(&wrapper_vertical_16_sse2,
534                    &wrapper_vertical_16_c, 8),
535         make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
536                    &vp9_highbd_lpf_horizontal_4_c, 10),
537         make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
538                    &vp9_highbd_lpf_vertical_4_c, 10),
539         make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
540                    &vp9_highbd_lpf_horizontal_8_c, 10),
541         make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
542                    &vp9_highbd_lpf_horizontal_16_c, 10),
543         make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
544                    &vp9_highbd_lpf_vertical_8_c, 10),
545         make_tuple(&wrapper_vertical_16_sse2,
546                    &wrapper_vertical_16_c, 10),
547         make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
548                    &vp9_highbd_lpf_horizontal_4_c, 12),
549         make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
550                    &vp9_highbd_lpf_vertical_4_c, 12),
551         make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
552                    &vp9_highbd_lpf_horizontal_8_c, 12),
553         make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
554                    &vp9_highbd_lpf_horizontal_16_c, 12),
555         make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
556                    &vp9_highbd_lpf_vertical_8_c, 12),
557         make_tuple(&wrapper_vertical_16_sse2,
558                    &wrapper_vertical_16_c, 12),
559         make_tuple(&wrapper_vertical_16_dual_sse2,
560                    &wrapper_vertical_16_dual_c, 8),
561         make_tuple(&wrapper_vertical_16_dual_sse2,
562                    &wrapper_vertical_16_dual_c, 10),
563         make_tuple(&wrapper_vertical_16_dual_sse2,
564                    &wrapper_vertical_16_dual_c, 12)));
565 #else
566 INSTANTIATE_TEST_CASE_P(
567     SSE2, Loop8Test6Param,
568     ::testing::Values(
569         make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
570         make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
571         make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8),
572         make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8)));
573 #endif  // CONFIG_VP9_HIGHBITDEPTH
574 #endif
575
576 #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
577 INSTANTIATE_TEST_CASE_P(
578     AVX2, Loop8Test6Param,
579     ::testing::Values(
580         make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8)));
581 #endif
582
583 #if HAVE_SSE2
584 #if CONFIG_VP9_HIGHBITDEPTH
585 INSTANTIATE_TEST_CASE_P(
586     SSE2, Loop8Test9Param,
587     ::testing::Values(
588         make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
589                    &vp9_highbd_lpf_horizontal_4_dual_c, 8),
590         make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
591                    &vp9_highbd_lpf_horizontal_8_dual_c, 8),
592         make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
593                    &vp9_highbd_lpf_vertical_4_dual_c, 8),
594         make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
595                    &vp9_highbd_lpf_vertical_8_dual_c, 8),
596         make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
597                    &vp9_highbd_lpf_horizontal_4_dual_c, 10),
598         make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
599                    &vp9_highbd_lpf_horizontal_8_dual_c, 10),
600         make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
601                    &vp9_highbd_lpf_vertical_4_dual_c, 10),
602         make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
603                    &vp9_highbd_lpf_vertical_8_dual_c, 10),
604         make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
605                    &vp9_highbd_lpf_horizontal_4_dual_c, 12),
606         make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
607                    &vp9_highbd_lpf_horizontal_8_dual_c, 12),
608         make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
609                    &vp9_highbd_lpf_vertical_4_dual_c, 12),
610         make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
611                    &vp9_highbd_lpf_vertical_8_dual_c, 12)));
612 #else
613 INSTANTIATE_TEST_CASE_P(
614     SSE2, Loop8Test9Param,
615     ::testing::Values(
616         make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
617                    &vp9_lpf_horizontal_4_dual_c, 8),
618         make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
619                    &vp9_lpf_horizontal_8_dual_c, 8),
620         make_tuple(&vp9_lpf_vertical_4_dual_sse2,
621                    &vp9_lpf_vertical_4_dual_c, 8),
622         make_tuple(&vp9_lpf_vertical_8_dual_sse2,
623                    &vp9_lpf_vertical_8_dual_c, 8)));
624 #endif  // CONFIG_VP9_HIGHBITDEPTH
625 #endif
626
627 #if HAVE_NEON
628 #if CONFIG_VP9_HIGHBITDEPTH
629 // No neon high bitdepth functions.
630 #else
631 INSTANTIATE_TEST_CASE_P(
632     NEON, Loop8Test6Param,
633     ::testing::Values(
634 #if HAVE_NEON_ASM
635 // Using #if inside the macro is unsupported on MSVS but the tests are not
636 // currently built for MSVS with ARM and NEON.
637         make_tuple(&vp9_lpf_horizontal_16_neon,
638                    &vp9_lpf_horizontal_16_c, 8),
639         make_tuple(&wrapper_vertical_16_neon,
640                    &wrapper_vertical_16_c, 8),
641         make_tuple(&wrapper_vertical_16_dual_neon,
642                    &wrapper_vertical_16_dual_c, 8),
643         make_tuple(&vp9_lpf_horizontal_8_neon,
644                    &vp9_lpf_horizontal_8_c, 8),
645         make_tuple(&vp9_lpf_vertical_8_neon,
646                    &vp9_lpf_vertical_8_c, 8),
647 #endif  // HAVE_NEON_ASM
648         make_tuple(&vp9_lpf_horizontal_4_neon,
649                    &vp9_lpf_horizontal_4_c, 8),
650         make_tuple(&vp9_lpf_vertical_4_neon,
651                    &vp9_lpf_vertical_4_c, 8)));
652 INSTANTIATE_TEST_CASE_P(
653     NEON, Loop8Test9Param,
654     ::testing::Values(
655 #if HAVE_NEON_ASM
656         make_tuple(&vp9_lpf_horizontal_8_dual_neon,
657                    &vp9_lpf_horizontal_8_dual_c, 8),
658         make_tuple(&vp9_lpf_vertical_8_dual_neon,
659                    &vp9_lpf_vertical_8_dual_c, 8),
660 #endif  // HAVE_NEON_ASM
661         make_tuple(&vp9_lpf_horizontal_4_dual_neon,
662                    &vp9_lpf_horizontal_4_dual_c, 8),
663         make_tuple(&vp9_lpf_vertical_4_dual_neon,
664                    &vp9_lpf_vertical_4_dual_c, 8)));
665 #endif  // CONFIG_VP9_HIGHBITDEPTH
666 #endif  // HAVE_NEON
667
668 }  // namespace