From 0998a146d4f63c1758d1d1b941e7a33a1515695d Mon Sep 17 00:00:00 2001 From: Jerome Jiang Date: Wed, 22 Feb 2017 14:24:02 -0800 Subject: [PATCH] Make vp9_scale_and_extend_frame_ssse3 work for hbd when bitdepth = 8. Only works for bitdepth = 8 when compiled with high bitdepth flag. 4x speed ups for handling 1:2 down/upsampling. Validated manually for: 1) Dynamic resize for a single layer encoding 2) SVC encoding with 3 spatial layers Results are bitexact with the patch and the speed gain (~4x) in the scaling was verified. BUG=webm:1371 Change-Id: I1bdb5f4d4bd0df67763fc271b6aa355e60f34712 --- vp9/common/vp9_rtcd_defs.pl | 7 ++----- vp9/encoder/vp9_encoder.c | 18 +++++++++++++----- vp9/vp9cx.mk | 2 -- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 4b4c119..d3659f7 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -228,11 +228,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # frame based scale # -if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { -} else { - add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst"; - specialize qw/vp9_scale_and_extend_frame ssse3/; -} +add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst"; +specialize qw/vp9_scale_and_extend_frame ssse3/; } # end encoder functions diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index aa8a27c..5d64b0a 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2406,7 +2406,8 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, vpx_extend_frame_borders(dst); } -#else +#endif // CONFIG_VP9_HIGHBITDEPTH + void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst) { const int src_w = src->y_crop_width; @@ -2444,7 +2445,6 @@ void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, vpx_extend_frame_borders(dst); } -#endif // CONFIG_VP9_HIGHBITDEPTH static int scale_down(VP9_COMP *cpi, int q) { RATE_CONTROL *const rc = &cpi->rc; @@ -3661,8 +3661,13 @@ YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm, if (cm->mi_cols * MI_SIZE != unscaled->y_width || cm->mi_rows * MI_SIZE != unscaled->y_height) { #if CONFIG_VP9_HIGHBITDEPTH - scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth); - scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth); + if (cm->bit_depth == VPX_BITS_8) { + vp9_scale_and_extend_frame(unscaled, scaled_temp); + vp9_scale_and_extend_frame(scaled_temp, scaled); + } else { + scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth); + scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth); + } #else vp9_scale_and_extend_frame(unscaled, scaled_temp); vp9_scale_and_extend_frame(scaled_temp, scaled); @@ -3682,7 +3687,10 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, #if CONFIG_VP9_HIGHBITDEPTH if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) && unscaled->y_height <= (scaled->y_height << 1)) - scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth); + if (cm->bit_depth == VPX_BITS_8) + vp9_scale_and_extend_frame(unscaled, scaled); + else + scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth); else scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); #else diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index a54e99e..3accaf5 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -119,9 +119,7 @@ endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c -ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c -endif ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c -- 2.7.4