From f67d740b342f3a0d7d0605bfb0a13db0987e57e7 Mon Sep 17 00:00:00 2001 From: Frank Galligan Date: Thu, 31 Jan 2013 15:36:55 -0800 Subject: [PATCH] Add support for x64 and win64 yasm flags. Some projects must define only win64 for Windows 64bit builds using yasm. Change-Id: I1d09590d66a7bfc8b4412e1cc8685978ac60b748 --- third_party/x86inc/x86inc.asm | 2 ++ vp8/common/x86/loopfilter_block_sse2.asm | 8 ++++---- vp8/common/x86/sad_sse3.asm | 8 ++++---- vp8/encoder/x86/dct_sse2.asm | 4 ++-- vp8/encoder/x86/quantize_sse2.asm | 12 ++++++------ vp8/encoder/x86/quantize_sse4.asm | 6 +++--- vp8/encoder/x86/quantize_ssse3.asm | 6 +++--- vp9/encoder/x86/vp9_dct_sse2.asm | 4 ++-- vp9/encoder/x86/vp9_quantize_sse2.asm | 12 ++++++------ vp9/encoder/x86/vp9_quantize_sse4.asm | 6 +++--- vp9/encoder/x86/vp9_quantize_ssse3.asm | 6 +++--- vp9/encoder/x86/vp9_sad_sse3.asm | 8 ++++---- vpx_ports/emms.asm | 2 +- vpx_ports/x86_abi_support.asm | 21 ++++++++++++++++----- 14 files changed, 59 insertions(+), 46 deletions(-) diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm index d9927ec..81f2311 100644 --- a/third_party/x86inc/x86inc.asm +++ b/third_party/x86inc/x86inc.asm @@ -61,6 +61,8 @@ %define mangle(x) x %elifidn __OUTPUT_FORMAT__,x64 %define mangle(x) x +%elifidn __OUTPUT_FORMAT__,win64 + %define mangle(x) x %else %define mangle(x) _ %+ x %endif diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm index 3d45c61..6d5aaa1 100644 --- a/vp8/common/x86/loopfilter_block_sse2.asm +++ b/vp8/common/x86/loopfilter_block_sse2.asm @@ -136,7 +136,7 @@ global sym(vp8_loop_filter_bh_y_sse2) PRIVATE sym(vp8_loop_filter_bh_y_sse2): -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %define src rcx ; src_ptr %define stride rdx ; src_pixel_step %define blimit r8 @@ -256,7 +256,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 movdqa i12, xmm3 movdqa i13, xmm8 -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 pop r13 pop r12 RESTORE_XMM @@ -278,7 +278,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 global sym(vp8_loop_filter_bv_y_sse2) PRIVATE sym(vp8_loop_filter_bv_y_sse2): -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %define src rcx ; src_ptr %define stride rdx ; src_pixel_step %define blimit r8 @@ -779,7 +779,7 @@ LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 ; un-ALIGN_STACK pop rsp -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 pop r13 pop r12 RESTORE_XMM diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm index f90a589..69c8d37 100644 --- a/vp8/common/x86/sad_sse3.asm +++ b/vp8/common/x86/sad_sse3.asm @@ -33,7 +33,7 @@ movsxd rax, dword ptr arg(1) ; src_stride movsxd rdx, dword ptr arg(3) ; ref_stride %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -76,7 +76,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif @@ -111,7 +111,7 @@ xchg rbx, rax %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -156,7 +156,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi RESTORE_XMM %endif diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm index d880ce0..d06bca5 100644 --- a/vp8/encoder/x86/dct_sse2.asm +++ b/vp8/encoder/x86/dct_sse2.asm @@ -29,7 +29,7 @@ movsxd rax, dword ptr arg(2) lea rcx, [rsi + rax*2] %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define input rcx %define output rdx %define pitch r8 @@ -53,7 +53,7 @@ RESTORE_GOT pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index fe9464b..3f48dfc 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -27,7 +27,7 @@ sym(vp8_regular_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -46,7 +46,7 @@ sym(vp8_regular_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -226,7 +226,7 @@ ZIGZAG_LOOP 15 pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif @@ -250,7 +250,7 @@ sym(vp8_fast_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %else @@ -264,7 +264,7 @@ sym(vp8_fast_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -367,7 +367,7 @@ sym(vp8_fast_quantize_b_sse2): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm index f211464..dbd171b 100644 --- a/vp8/encoder/x86/quantize_sse4.asm +++ b/vp8/encoder/x86/quantize_sse4.asm @@ -31,7 +31,7 @@ sym(vp8_regular_quantize_b_sse4): %define stack_size 32 sub rsp, stack_size %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 8, u push rdi push rsi @@ -43,7 +43,7 @@ sym(vp8_regular_quantize_b_sse4): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -240,7 +240,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 pop rbp %else %undef xmm5 - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi RESTORE_XMM diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index 3536889..7b1dc11 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -27,7 +27,7 @@ sym(vp8_fast_quantize_b_ssse3): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -38,7 +38,7 @@ sym(vp8_fast_quantize_b_ssse3): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -122,7 +122,7 @@ sym(vp8_fast_quantize_b_ssse3): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp9/encoder/x86/vp9_dct_sse2.asm b/vp9/encoder/x86/vp9_dct_sse2.asm index 57b81a5..bbd6086 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.asm +++ b/vp9/encoder/x86/vp9_dct_sse2.asm @@ -29,7 +29,7 @@ movsxd rax, dword ptr arg(2) lea rcx, [rsi + rax*2] %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define input rcx %define output rdx %define pitch r8 @@ -53,7 +53,7 @@ RESTORE_GOT pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm index 060acc2..2a686f5 100644 --- a/vp9/encoder/x86/vp9_quantize_sse2.asm +++ b/vp9/encoder/x86/vp9_quantize_sse2.asm @@ -27,7 +27,7 @@ sym(vp9_regular_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -46,7 +46,7 @@ sym(vp9_regular_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -223,7 +223,7 @@ ZIGZAG_LOOP 15 pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif @@ -247,7 +247,7 @@ sym(vp9_fast_quantize_b_sse2): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %else @@ -261,7 +261,7 @@ sym(vp9_fast_quantize_b_sse2): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -361,7 +361,7 @@ sym(vp9_fast_quantize_b_sse2): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm index 1d43ce9..d7779bd 100644 --- a/vp9/encoder/x86/vp9_quantize_sse4.asm +++ b/vp9/encoder/x86/vp9_quantize_sse4.asm @@ -31,7 +31,7 @@ sym(vp9_regular_quantize_b_sse4): %define stack_size 32 sub rsp, stack_size %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 8, u push rdi push rsi @@ -43,7 +43,7 @@ sym(vp9_regular_quantize_b_sse4): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -238,7 +238,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 pop rbp %else %undef xmm5 - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi RESTORE_XMM diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm index 41edbc1..e082af1 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -27,7 +27,7 @@ sym(vp9_fast_quantize_b_ssse3): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -38,7 +38,7 @@ sym(vp9_fast_quantize_b_ssse3): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -122,7 +122,7 @@ sym(vp9_fast_quantize_b_ssse3): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp9/encoder/x86/vp9_sad_sse3.asm b/vp9/encoder/x86/vp9_sad_sse3.asm index 2c409cb..1c39a08 100644 --- a/vp9/encoder/x86/vp9_sad_sse3.asm +++ b/vp9/encoder/x86/vp9_sad_sse3.asm @@ -33,7 +33,7 @@ movsxd rax, dword ptr arg(1) ; src_stride movsxd rdx, dword ptr arg(3) ; ref_stride %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -76,7 +76,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif @@ -111,7 +111,7 @@ xchg rbx, rax %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx @@ -156,7 +156,7 @@ pop rsi pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi RESTORE_XMM %endif diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm index efad1a5..db8da28 100644 --- a/vpx_ports/emms.asm +++ b/vpx_ports/emms.asm @@ -18,7 +18,7 @@ sym(vpx_reset_mmx_state): ret -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 global sym(vpx_winx64_fldcw) PRIVATE sym(vpx_winx64_fldcw): sub rsp, 8 diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 13a609d..eccbfa3 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -78,6 +78,17 @@ %endif +; LIBVPX_YASM_WIN64 +; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 +; or win64 is defined on the Yasm command line. +%ifidn __OUTPUT_FORMAT__,win64 +%define LIBVPX_YASM_WIN64 1 +%elifidn __OUTPUT_FORMAT__,x64 +%define LIBVPX_YASM_WIN64 1 +%else +%define LIBVPX_YASM_WIN64 0 +%endif + ; sym() ; Return the proper symbol name for the target ABI. ; @@ -90,7 +101,7 @@ %define sym(x) x %elifidn __OUTPUT_FORMAT__,elfx32 %define sym(x) x -%elifidn __OUTPUT_FORMAT__,x64 +%elif LIBVPX_YASM_WIN64 %define sym(x) x %else %define sym(x) _ %+ x @@ -114,7 +125,7 @@ %define PRIVATE :hidden %elifidn __OUTPUT_FORMAT__,elfx32 %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,x64 + %elif LIBVPX_YASM_WIN64 %define PRIVATE %else %define PRIVATE :private_extern @@ -131,7 +142,7 @@ %else ; 64 bit ABI passes arguments in registers. This is a workaround to get up ; and running quickly. Relies on SHADOW_ARGS_TO_STACK - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define arg(x) [rbp+16+8*x] %else %define arg(x) [rbp-8-8*x] @@ -257,7 +268,7 @@ %endm %define UNSHADOW_ARGS %else -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %macro SHADOW_ARGS_TO_STACK 1 ; argc %if %1 > 0 mov arg(0),rcx @@ -313,7 +324,7 @@ ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return ; value. Typically we follow this up with 'push rbp' - re-aligning the stack - ; but in some cases this is not done and unaligned movs must be used. -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %macro SAVE_XMM 1-2 a %if %1 < 6 %error Only xmm registers 6-15 must be preserved -- 2.7.4