From: Johann Date: Fri, 18 Nov 2011 19:50:13 +0000 (-0800) Subject: Move shared data to shared location X-Git-Tag: 1.0_branch~214^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f2cd4ded2248e3d7532f30c68cf0d83b5f3bb1f0;p=profile%2Fivi%2Flibvpx.git Move shared data to shared location Storing vp8_bilinear_filters_mmx in an mmx file and using it in an sse2 file is bad Moving towards allowing --disable-mmx Change-Id: I20493b35bdedcdcfc0915e6f05fdbe6c81a4a742 --- diff --git a/vp8/common/x86/filter_x86.c b/vp8/common/x86/filter_x86.c new file mode 100644 index 0000000..ebab814 --- /dev/null +++ b/vp8/common/x86/filter_x86.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/mem.h" + +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = +{ + { 128, 128, 128, 128, 0, 0, 0, 0 }, + { 112, 112, 112, 112, 16, 16, 16, 16 }, + { 96, 96, 96, 96, 32, 32, 32, 32 }, + { 80, 80, 80, 80, 48, 48, 48, 48 }, + { 64, 64, 64, 64, 64, 64, 64, 64 }, + { 48, 48, 48, 48, 80, 80, 80, 80 }, + { 32, 32, 32, 32, 96, 96, 96, 96 }, + { 16, 16, 16, 16, 112, 112, 112, 112 } +}; + +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = +{ + { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 }, + { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, + { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, + { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, + { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, + { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, + { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 } +}; diff --git a/vp8/common/x86/filter_x86.h b/vp8/common/x86/filter_x86.h new file mode 100644 index 0000000..efcc4dc --- /dev/null +++ b/vp8/common/x86/filter_x86.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef FILTER_X86_H +#define FILTER_X86_H + +/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with + * duplicated values */ +extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */ +extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */ + +#endif /* FILTER_X86_H */ diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm index e68d950..5528fd0 100644 --- a/vp8/common/x86/subpixel_mmx.asm +++ b/vp8/common/x86/subpixel_mmx.asm @@ -10,6 +10,7 @@ %include "vpx_ports/x86_abi_support.asm" +extern sym(vp8_bilinear_filters_x86_8) %define BLOCK_HEIGHT_WIDTH 4 @@ -222,14 +223,14 @@ sym(vp8_bilinear_predict8x8_mmx): push rdi ; end prolog - ;const short *HFilter = bilinear_filters_mmx[xoffset]; - ;const short *VFilter = bilinear_filters_mmx[yoffset]; + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; shl rax, 5 ; offset * 32 - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] add rax, rcx ; HFilter mov rsi, arg(0) ;src_ptr ; @@ -379,13 +380,13 @@ sym(vp8_bilinear_predict8x4_mmx): push rdi ; end prolog - ;const short *HFilter = bilinear_filters_mmx[xoffset]; - ;const short *VFilter = bilinear_filters_mmx[yoffset]; + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] shl rax, 5 mov rsi, arg(0) ;src_ptr ; @@ -534,13 +535,13 @@ sym(vp8_bilinear_predict4x4_mmx): push rdi ; end prolog - ;const short *HFilter = bilinear_filters_mmx[xoffset]; - ;const short *VFilter = bilinear_filters_mmx[yoffset]; + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] shl rax, 5 add rax, rcx ; HFilter @@ -699,29 +700,3 @@ sym(vp8_six_tap_mmx): times 8 dw 0 -align 16 -global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx)) -sym(vp8_bilinear_filters_mmx): - times 8 dw 128 - times 8 dw 0 - - times 8 dw 112 - times 8 dw 16 - - times 8 dw 96 - times 8 dw 32 - - times 8 dw 80 - times 8 dw 48 - - times 8 dw 64 - times 8 dw 64 - - times 8 dw 48 - times 8 dw 80 - - times 8 dw 32 - times 8 dw 96 - - times 8 dw 16 - times 8 dw 112 diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index b62b5c6..cb550af 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -10,6 +10,7 @@ %include "vpx_ports/x86_abi_support.asm" +extern sym(vp8_bilinear_filters_x86_8) %define BLOCK_HEIGHT_WIDTH 4 %define VP8_FILTER_WEIGHT 128 @@ -961,7 +962,7 @@ sym(vp8_unpack_block1d16_h6_sse2): ; unsigned char *dst_ptr, ; int dst_pitch ;) -extern sym(vp8_bilinear_filters_mmx) +extern sym(vp8_bilinear_filters_x86_8) global sym(vp8_bilinear_predict16x16_sse2) sym(vp8_bilinear_predict16x16_sse2): push rbp @@ -973,10 +974,10 @@ sym(vp8_bilinear_predict16x16_sse2): push rdi ; end prolog - ;const short *HFilter = bilinear_filters_mmx[xoffset] - ;const short *VFilter = bilinear_filters_mmx[yoffset] + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] movsxd rax, dword ptr arg(2) ;xoffset cmp rax, 0 ;skip first_pass filter if xoffset=0 @@ -1230,7 +1231,6 @@ sym(vp8_bilinear_predict16x16_sse2): ; unsigned char *dst_ptr, ; int dst_pitch ;) -extern sym(vp8_bilinear_filters_mmx) global sym(vp8_bilinear_predict8x8_sse2) sym(vp8_bilinear_predict8x8_sse2): push rbp @@ -1245,9 +1245,9 @@ sym(vp8_bilinear_predict8x8_sse2): ALIGN_STACK 16, rax sub rsp, 144 ; reserve 144 bytes - ;const short *HFilter = bilinear_filters_mmx[xoffset] - ;const short *VFilter = bilinear_filters_mmx[yoffset] - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] + ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] + ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] mov rsi, arg(0) ;src_ptr movsxd rdx, dword ptr arg(1) ;src_pixels_per_line diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h index 75991cc..01ec9e2 100644 --- a/vp8/common/x86/subpixel_x86.h +++ b/vp8/common/x86/subpixel_x86.h @@ -12,6 +12,8 @@ #ifndef SUBPIXEL_X86_H #define SUBPIXEL_X86_H +#include "filter_x86.h" + /* Note: * * This platform is commonly built for runtime CPU detection. If you modify diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c index bce7bc3..a623c69 100644 --- a/vp8/common/x86/vp8_asm_stubs.c +++ b/vp8/common/x86/vp8_asm_stubs.c @@ -12,9 +12,9 @@ #include "vpx_config.h" #include "vpx_ports/mem.h" #include "vp8/common/subpixel.h" +#include "filter_x86.h" extern const short vp8_six_tap_mmx[8][6*8]; -extern const short vp8_bilinear_filters_mmx[8][2*8]; extern void vp8_filter_block1d_h6_mmx ( diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c index 92b695f..e2524b4 100644 --- a/vp8/encoder/x86/variance_mmx.c +++ b/vp8/encoder/x86/variance_mmx.c @@ -12,6 +12,7 @@ #include "vp8/encoder/variance.h" #include "vp8/common/pragmas.h" #include "vpx_ports/mem.h" +#include "vp8/common/x86/filter_x86.h" extern void filter_block1d_h6_mmx ( @@ -21,7 +22,7 @@ extern void filter_block1d_h6_mmx unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - short *vp7_filter + short *filter ); extern void filter_block1d_v6_mmx ( @@ -31,7 +32,7 @@ extern void filter_block1d_v6_mmx unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - short *vp7_filter + short *filter ); extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); @@ -198,24 +199,6 @@ unsigned int vp8_variance8x16_mmx( } - - -/////////////////////////////////////////////////////////////////////////// -// the mmx function that does the bilinear filtering and var calculation // -// int one pass // -/////////////////////////////////////////////////////////////////////////// -DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) = -{ - { 128, 128, 128, 128, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 112, 112, 112, 112 } -}; - unsigned int vp8_sub_pixel_variance4x4_mmx ( const unsigned char *src_ptr, @@ -232,7 +215,7 @@ unsigned int vp8_sub_pixel_variance4x4_mmx vp8_filter_block2d_bil4x4_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum, &xxsum ); *sse = xxsum; @@ -257,7 +240,7 @@ unsigned int vp8_sub_pixel_variance8x8_mmx vp8_filter_block2d_bil_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 8, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum, &xxsum ); *sse = xxsum; @@ -283,7 +266,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx vp8_filter_block2d_bil_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum0, &xxsum0 ); @@ -291,7 +274,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx vp8_filter_block2d_bil_var_mmx( src_ptr + 8, src_pixels_per_line, dst_ptr + 8, dst_pixels_per_line, 16, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum1, &xxsum1 ); @@ -336,7 +319,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx vp8_filter_block2d_bil_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 8, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum0, &xxsum0 ); @@ -344,7 +327,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx vp8_filter_block2d_bil_var_mmx( src_ptr + 8, src_pixels_per_line, dst_ptr + 8, dst_pixels_per_line, 8, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum1, &xxsum1 ); @@ -371,7 +354,7 @@ unsigned int vp8_sub_pixel_variance8x16_mmx vp8_filter_block2d_bil_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum, &xxsum ); *sse = xxsum; diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c index 24062eb..39213b0 100644 --- a/vp8/encoder/x86/variance_sse2.c +++ b/vp8/encoder/x86/variance_sse2.c @@ -12,11 +12,12 @@ #include "vp8/encoder/variance.h" #include "vp8/common/pragmas.h" #include "vpx_ports/mem.h" +#include "vp8/common/x86/filter_x86.h" -extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); +extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); +extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); +extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); +extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter); extern void vp8_filter_block2d_bil4x4_var_mmx ( @@ -135,8 +136,6 @@ void vp8_half_vert_variance16x_h_sse2 unsigned int *sumsquared ); -DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]); - unsigned int vp8_variance4x4_wmt( const unsigned char *src_ptr, int source_stride, @@ -262,7 +261,7 @@ unsigned int vp8_sub_pixel_variance4x4_wmt vp8_filter_block2d_bil4x4_var_mmx( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, - vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset], + vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], &xsum, &xxsum ); *sse = xxsum; diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 5c15a3e..683af34 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -72,6 +72,8 @@ VP8_COMMON_SRCS-yes += common/swapyv12buffer.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c VP8_COMMON_SRCS-yes += common/treecoder.c +VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c +VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/subpixel_x86.h VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/recon_x86.h