#include "vpx_ports/mem.h"
namespace {
-typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h);
+typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h);
struct ConvolveFunctions {
- ConvolveFunctions(convolve_fn_t h8, convolve_fn_t h8_avg,
- convolve_fn_t v8, convolve_fn_t v8_avg,
- convolve_fn_t hv8, convolve_fn_t hv8_avg)
+ ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
+ ConvolveFunc v8, ConvolveFunc v8_avg,
+ ConvolveFunc hv8, ConvolveFunc hv8_avg)
: h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
hv8_avg_(hv8_avg) {}
- convolve_fn_t h8_;
- convolve_fn_t v8_;
- convolve_fn_t hv8_;
- convolve_fn_t h8_avg_;
- convolve_fn_t v8_avg_;
- convolve_fn_t hv8_avg_;
+ ConvolveFunc h8_;
+ ConvolveFunc v8_;
+ ConvolveFunc hv8_;
+ ConvolveFunc h8_avg_;
+ ConvolveFunc v8_avg_;
+ ConvolveFunc hv8_avg_;
};
-typedef std::tr1::tuple<int, int, const ConvolveFunctions*> convolve_param_t;
+typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
// Reference 8-tap subpixel filter, slightly modified to fit into this test.
#define VP9_FILTER_WEIGHT 128
output_width, output_height);
}
-class ConvolveTest : public ::testing::TestWithParam<convolve_param_t> {
+class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
public:
static void SetUpTestCase() {
// Force input_ to be unaligned, output to be 16 byte aligned.
}
::libvpx_test::ACMRandom prng;
- for (int i = 0; i < kInputBufferSize; ++i)
- input_[i] = prng.Rand8Extremes();
+ for (int i = 0; i < kInputBufferSize; ++i) {
+ if (i & 1)
+ input_[i] = 255;
+ else
+ input_[i] = prng.Rand8Extremes();
+ }
}
void SetConstantInput(int value) {
uint8_t* const out = output();
DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
Width(), Height()));
uint8_t* const out = output();
DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
Width(), Height()));
uint8_t* const out = output();
DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
Width(), Height()));
for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
const InterpKernel *filters =
vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
+ const InterpKernel *const eighttap_smooth =
+ vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
+
for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
filter_block2d_8_c(in, kInputStride,
ref, kOutputStride,
Width(), Height());
- if (filters == vp9_sub_pel_filters_8lp || (filter_x && filter_y))
- REGISTER_STATE_CHECK(
+ if (filters == eighttap_smooth || (filter_x && filter_y))
+ ASM_REGISTER_STATE_CHECK(
UUT_->hv8_(in, kInputStride, out, kOutputStride,
filters[filter_x], 16, filters[filter_y], 16,
Width(), Height()));
else if (filter_y)
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->v8_(in, kInputStride, out, kOutputStride,
kInvalidFilter, 16, filters[filter_y], 16,
Width(), Height()));
else
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->h8_(in, kInputStride, out, kOutputStride,
filters[filter_x], 16, kInvalidFilter, 16,
Width(), Height()));
for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
const InterpKernel *filters =
vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
+ const InterpKernel *const eighttap_smooth =
+ vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
ref, kOutputStride,
Width(), Height());
- if (filters == vp9_sub_pel_filters_8lp || (filter_x && filter_y))
- REGISTER_STATE_CHECK(
+ if (filters == eighttap_smooth || (filter_x && filter_y))
+ ASM_REGISTER_STATE_CHECK(
UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
filters[filter_x], 16, filters[filter_y], 16,
Width(), Height()));
else if (filter_y)
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
filters[filter_x], 16, filters[filter_y], 16,
Width(), Height()));
else
- REGISTER_STATE_CHECK(
+ ASM_REGISTER_STATE_CHECK(
UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
filters[filter_x], 16, filters[filter_y], 16,
Width(), Height()));
*/
/* Test the horizontal filter. */
- REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
- kChangeFilters[kInitialSubPelOffset],
- kInputPixelStep, NULL, 0, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->h8_(in, kInputStride, out, kOutputStride,
+ kChangeFilters[kInitialSubPelOffset],
+ kInputPixelStep, NULL, 0, Width(), Height()));
for (int x = 0; x < Width(); ++x) {
const int kFilterPeriodAdjust = (x >> 3) << 3;
}
/* Test the vertical filter. */
- REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
- NULL, 0, kChangeFilters[kInitialSubPelOffset],
- kInputPixelStep, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->v8_(in, kInputStride, out, kOutputStride,
+ NULL, 0, kChangeFilters[kInitialSubPelOffset],
+ kInputPixelStep, Width(), Height()));
for (int y = 0; y < Height(); ++y) {
const int kFilterPeriodAdjust = (y >> 3) << 3;
}
/* Test the horizontal and vertical filters in combination. */
- REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
- kChangeFilters[kInitialSubPelOffset],
- kInputPixelStep,
- kChangeFilters[kInitialSubPelOffset],
- kInputPixelStep,
- Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
+ kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
+ Width(), Height()));
for (int y = 0; y < Height(); ++y) {
const int kFilterPeriodAdjustY = (y >> 3) << 3;
TEST_P(ConvolveTest, CheckScalingFiltering) {
uint8_t* const in = input();
uint8_t* const out = output();
+ const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
SetConstantInput(127);
for (int frac = 0; frac < 16; ++frac) {
for (int step = 1; step <= 32; ++step) {
/* Test the horizontal and vertical filters in combination. */
- REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
- vp9_sub_pel_filters_8[frac], step,
- vp9_sub_pel_filters_8[frac], step,
- Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ eighttap[frac], step,
+ eighttap[frac], step,
+ Width(), Height()));
CheckGuardBlocks();
make_tuple(64, 64, &convolve8_ssse3)));
#endif
+#if HAVE_AVX2
+// TODO(jzern): these prototypes can be removed after the avx2 versions are
+// reenabled in vp9_rtcd_defs.pl.
+extern "C" {
+void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+}
+
+const ConvolveFunctions convolve8_avx2(
+ vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
+ vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
+ vp9_convolve8_avx2, vp9_convolve8_avg_ssse3);
+
+INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
+ make_tuple(4, 4, &convolve8_avx2),
+ make_tuple(8, 4, &convolve8_avx2),
+ make_tuple(4, 8, &convolve8_avx2),
+ make_tuple(8, 8, &convolve8_avx2),
+ make_tuple(8, 16, &convolve8_avx2)));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_AVX2, ConvolveTest, ::testing::Values(
+ make_tuple(16, 8, &convolve8_avx2),
+ make_tuple(16, 16, &convolve8_avx2),
+ make_tuple(32, 16, &convolve8_avx2),
+ make_tuple(16, 32, &convolve8_avx2),
+ make_tuple(32, 32, &convolve8_avx2),
+ make_tuple(64, 32, &convolve8_avx2),
+ make_tuple(32, 64, &convolve8_avx2),
+ make_tuple(64, 64, &convolve8_avx2)));
+#endif
+
#if HAVE_NEON_ASM
const ConvolveFunctions convolve8_neon(
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,