From 01376858cd184d820ff4c2d8390361a8679c0e87 Mon Sep 17 00:00:00 2001 From: Fritz Koenig Date: Fri, 19 Aug 2011 08:51:27 -0700 Subject: [PATCH] Reclasify optimized ssim calculations as SSE2. Calculations were incorrectly classified as either SSE3 or SSSE3. Only using SSE2 instructions. Cleanup function names and make non-RTCD code work as well. Change-Id: I29f5c2ead342b2086a468029c15e2c1d948b5d97 --- vp8/encoder/generic/csystemdependent.c | 9 ++++---- vp8/encoder/ssim.c | 19 +++++------------ vp8/encoder/variance.h | 16 +++++++------- vp8/encoder/x86/ssim_opt.asm | 12 +++++------ vp8/encoder/x86/variance_x86.h | 10 +++++++++ vp8/encoder/x86/x86_csystemdependent.c | 38 +++++++--------------------------- 6 files changed, 41 insertions(+), 63 deletions(-) diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 9906105..a14843a 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -94,16 +94,15 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) #if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; #endif +#if CONFIG_INTERNAL_STATS + cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c; + cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c; +#endif #endif // Pure C: vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; -#if CONFIG_INTERNAL_STATS - cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c; - cpi->rtcd.variance.ssimpf = ssim_parms_c; -#endif - #if ARCH_X86 || ARCH_X86_64 vp8_arch_x86_encoder_init(cpi); #endif diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c index fea756f..d0f8e49 100644 --- a/vp8/encoder/ssim.c +++ b/vp8/encoder/ssim.c @@ -9,18 +9,9 @@ */ -#include "vpx_scale/yv12config.h" -#include "math.h" #include "onyx_int.h" -#if CONFIG_RUNTIME_CPU_DETECT -#define IF_RTCD(x) (x) -#else -#define IF_RTCD(x) NULL -#endif - - -void ssim_parms_c +void vp8_ssim_parms_16x16_c ( unsigned char *s, int sp, @@ -46,7 +37,7 @@ void ssim_parms_c } } } -void ssim_parms_8x8_c +void vp8_ssim_parms_8x8_c ( unsigned char *s, int sp, @@ -107,14 +98,14 @@ static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp, const vp8_variance_rtcd_vtable_t *rtcd) { unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); } static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp, const vp8_variance_rtcd_vtable_t *rtcd) { unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + SSIMPF_INVOKE(rtcd,8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); } @@ -134,7 +125,7 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp, c1 = cc1*16; c2 = cc2*16; - rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); ssim_n1 = (2*sum_s*sum_r+ c1); ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2); diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index 5fd6d3a..0f35152 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -320,16 +320,16 @@ extern prototype_variance(vp8_variance_mse16x16); #endif extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs); -#ifndef vp8_ssimpf -#define vp8_ssimpf ssim_parms_c -#endif -extern prototype_ssimpf(vp8_ssimpf) - #ifndef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 ssim_parms_8x8_c +#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c #endif extern prototype_ssimpf(vp8_ssimpf_8x8) +#ifndef vp8_ssimpf_16x16 +#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c +#endif +extern prototype_ssimpf(vp8_ssimpf_16x16) + typedef prototype_sad(*vp8_sad_fn_t); typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t); typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t); @@ -394,7 +394,7 @@ typedef struct #if CONFIG_INTERNAL_STATS vp8_ssimpf_fn_t ssimpf_8x8; - vp8_ssimpf_fn_t ssimpf; + vp8_ssimpf_fn_t ssimpf_16x16; #endif } vp8_variance_rtcd_vtable_t; @@ -417,8 +417,10 @@ typedef struct #if CONFIG_RUNTIME_CPU_DETECT #define VARIANCE_INVOKE(ctx,fn) (ctx)->fn +#define SSIMPF_INVOKE(ctx,fn) (ctx)->fn #else #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn +#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn #endif #endif diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm index d5d267a..8af4b45 100644 --- a/vp8/encoder/x86/ssim_opt.asm +++ b/vp8/encoder/x86/ssim_opt.asm @@ -44,7 +44,7 @@ paddd %1, xmm1 SUM_ACROSS_Q %1 %endmacro -;void ssim_parms_sse3( +;void ssim_parms_sse2( ; unsigned char *s, ; int sp, ; unsigned char *r, @@ -61,8 +61,8 @@ ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. -global sym(vp8_ssim_parms_16x16_sse3) -sym(vp8_ssim_parms_16x16_sse3): +global sym(vp8_ssim_parms_16x16_sse2) +sym(vp8_ssim_parms_16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 @@ -134,7 +134,7 @@ NextRow: pop rbp ret -;void ssim_parms_sse3( +;void ssim_parms_sse2( ; unsigned char *s, ; int sp, ; unsigned char *r, @@ -151,8 +151,8 @@ NextRow: ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. -global sym(vp8_ssim_parms_8x8_sse3) -sym(vp8_ssim_parms_8x8_sse3): +global sym(vp8_ssim_parms_8x8_sse2) +sym(vp8_ssim_parms_8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h index af6c4d2..4b41b54 100644 --- a/vp8/encoder/x86/variance_x86.h +++ b/vp8/encoder/x86/variance_x86.h @@ -140,6 +140,8 @@ extern prototype_getmbss(vp8_get_mb_ss_sse2); extern prototype_variance(vp8_mse16x16_wmt); extern prototype_variance2(vp8_get8x8var_sse2); extern prototype_variance2(vp8_get16x16var_sse2); +extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2) +extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2) #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_variance_sad4x4 @@ -208,6 +210,14 @@ extern prototype_variance2(vp8_get16x16var_sse2); #undef vp8_variance_mse16x16 #define vp8_variance_mse16x16 vp8_mse16x16_wmt +#if ARCH_X86_64 +#undef vp8_ssimpf_8x8 +#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2 + +#undef vp8_ssimpf_16x16 +#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2 +#endif + #endif #endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index badb9f0..36b7b71 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -111,29 +111,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) #endif -#if HAVE_SSSE3 -#if CONFIG_INTERNAL_STATS -#if ARCH_X86_64 -typedef void ssimpf -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -); - -extern ssimpf vp8_ssim_parms_16x16_sse3; -extern ssimpf vp8_ssim_parms_8x8_sse3; -#endif -#endif -#endif - - void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT @@ -246,6 +223,13 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) #if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; #endif + +#if CONFIG_INTERNAL_STATS +#if ARCH_X86_64 + cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2; + cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2; +#endif +#endif } #endif @@ -280,14 +264,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; - -#if CONFIG_INTERNAL_STATS -#if ARCH_X86_64 - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3; - cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3; -#endif -#endif - } #endif -- 2.7.4