From e07e74fb0f548a2ceb72da4d3264541ffc807db4 Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Tue, 14 Feb 2017 15:39:37 -0800 Subject: [PATCH] Add vpx_highbd_idct16x16_38_add_c() When eob is less than or equal to 38 for high-bitdepth 16x16 idct, call this function. BUG=webm:1301 Change-Id: I09167f89d29c401f9c36710b0fd2d02644052060 --- test/partial_idct_test.cc | 9 +++++++++ vp9/common/vp9_idct.c | 2 ++ vpx_dsp/inv_txfm.c | 29 +++++++++++++++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 7 +++++++ 4 files changed, 47 insertions(+) diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc index 14d09f6..31b78e3 100644 --- a/test/partial_idct_test.cc +++ b/test/partial_idct_test.cc @@ -354,6 +354,15 @@ const PartialInvTxfmParam c_partial_idct_tests[] = { &highbd_wrapper, TX_16X16, 256, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, + &highbd_wrapper, TX_16X16, 38, 8, 2), + make_tuple( + &vpx_highbd_fdct16x16_c, &highbd_wrapper, + &highbd_wrapper, TX_16X16, 38, 10, 2), + make_tuple( + &vpx_highbd_fdct16x16_c, &highbd_wrapper, + &highbd_wrapper, TX_16X16, 38, 12, 2), + make_tuple( + &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 9340d5d..23cbe9b 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -349,6 +349,8 @@ void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, vpx_highbd_idct16x16_1_add(input, dest, stride, bd); } else if (eob <= 10) { vpx_highbd_idct16x16_10_add(input, dest, stride, bd); + } else if (eob <= 38) { + vpx_highbd_idct16x16_38_add(input, dest, stride, bd); } else { vpx_highbd_idct16x16_256_add(input, dest, stride, bd); } diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c index 5cfc8e0..555205e 100644 --- a/vpx_dsp/inv_txfm.c +++ b/vpx_dsp/inv_txfm.c @@ -2082,6 +2082,35 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { output[15] = HIGHBD_WRAPLOW(-x1, bd); } +void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int bd) { + int i, j; + tran_low_t out[16 * 16] = { 0 }; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8); + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 8x8 area, we only need to calculate first 8 rows here. + for (i = 0; i < 8; ++i) { + vpx_highbd_idct16_c(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Then transform columns + for (i = 0; i < 16; ++i) { + uint16_t *destT = dest; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; + vpx_highbd_idct16_c(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + destT[i] = highbd_clip_pixel_add(destT[i], + ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + destT += stride; + } + } +} + void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i, j; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 47616d6..0f8f508 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -673,6 +673,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; } else { add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; @@ -730,6 +732,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; + $vpx_highbd_idct16x16_38_add_neon=vpx_highbd_idct16x16_256_add_neon; + $vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2; + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add sse2/; } # CONFIG_EMULATE_HARDWARE -- 2.7.4