some XMM registers are non-volatile on windows x64 ABI
authorMakoto Kato <makoto.kt@gmail.com>
Fri, 11 Jun 2010 09:32:28 +0000 (18:32 +0900)
committerJohn Koleszar <jkoleszar@google.com>
Fri, 11 Jun 2010 16:11:15 +0000 (12:11 -0400)
XMM6 to XMM15 are non-volatile on Windows x64 ABI.  We have to save
these registers.

Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076

vp8/common/x86/iwalsh_sse2.asm
vp8/common/x86/loopfilter_sse2.asm
vp8/common/x86/postproc_sse2.asm
vp8/common/x86/recon_sse2.asm
vp8/common/x86/subpixel_sse2.asm
vpx_ports/x86_abi_support.asm

index cb61691..bb0d1d7 100644 (file)
@@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 2
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
index 1c0a388..d160dd6 100644 (file)
@@ -26,6 +26,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -212,6 +213,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -231,6 +233,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -652,6 +655,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -671,6 +675,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1002,6 +1007,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1021,6 +1027,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1564,6 +1571,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1583,6 +1591,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1679,6 +1688,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1698,6 +1708,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
     push        rbp         ; save old base pointer value.
     mov         rbp, rsp    ; set new base pointer value.
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx         ; save callee-saved reg
     push        rsi
     push        rdi
@@ -1942,6 +1953,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
index 5097b2a..9e56429 100644 (file)
@@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -240,6 +241,7 @@ acrossnextcol:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -439,6 +442,7 @@ loop_row:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -612,6 +617,7 @@ nextcol4:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
index 2ce028c..cfdbfad 100644 (file)
@@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
index cc2837b..b71a2f9 100644 (file)
@@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 8
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -397,6 +402,7 @@ vp8_filter_block1d8_v6_sse2_loop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -510,6 +516,7 @@ vp8_filter_block1d16_v6_sse2_loop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -641,6 +648,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -876,6 +884,7 @@ vp8_filter_block1d8_v6_only_sse2_loop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -894,6 +903,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -933,6 +943,7 @@ unpack_block1d16_h6_sse2_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -953,6 +964,7 @@ sym(vp8_bilinear_predict16x16_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
index 6fdbf8a..7840e35 100644 (file)
   %define UNSHADOW_ARGS mov rsp, rbp
 %endif
 
+; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
+; rsp register has to be aligned
+%ifidn __OUTPUT_FORMAT__,x64
+%macro SAVE_XMM 0
+  sub rsp, 32
+  movdqa XMMWORD PTR [rsp], xmm6
+  movdqa XMMWORD PTR [rsp+16], xmm7
+%endmacro
+%macro RESTORE_XMM 0
+  movdqa xmm6, XMMWORD PTR [rsp]
+  movdqa xmm7, XMMWORD PTR [rsp+16]
+  add rsp, 32
+%endmacro
+%else
+%macro SAVE_XMM 0
+%endmacro
+%macro RESTORE_XMM 0
+%endmacro
+%endif
 
 ; Name of the rodata section
 ;