#include "../builtins/assembly.h"
#include "../sanitizer_common/sanitizer_asm.h"
-// XRay trampolines which are not produced by intrinsics are not System V AMD64
-// ABI compliant because they are called with a stack that is always misaligned
-// by 8 bytes with respect to a 16 bytes alignment. This is because they are
-// called immediately after the call to, or immediately before returning from,
-// the function being instrumented. This saves space in the patch point, but
-// misaligns the stack by 8 bytes.
-
-.macro ALIGN_STACK_16B
- subq $8, %rsp
- CFI_ADJUST_CFA_OFFSET(8)
-.endm
-.macro RESTORE_STACK_ALIGNMENT
- addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET(-8)
-.endm
-// This macro should keep the stack aligned to 16 bytes.
.macro SAVE_REGISTERS
pushfq
- CFI_ADJUST_CFA_OFFSET(8)
subq $240, %rsp
- CFI_ADJUST_CFA_OFFSET(240)
+ CFI_DEF_CFA_OFFSET(248)
movq %rbp, 232(%rsp)
movupd %xmm0, 216(%rsp)
movupd %xmm1, 200(%rsp)
movq %r15, 0(%rsp)
.endm
-// This macro should keep the stack aligned to 16 bytes.
.macro RESTORE_REGISTERS
movq 232(%rsp), %rbp
movupd 216(%rsp), %xmm0
movq 8(%rsp), %r14
movq 0(%rsp), %r15
addq $240, %rsp
- CFI_ADJUST_CFA_OFFSET(-240)
popfq
- CFI_ADJUST_CFA_OFFSET(-8)
+ CFI_DEF_CFA_OFFSET(8)
+.endm
+
+.macro ALIGNED_CALL_RAX
+ // Call the logging handler, after aligning the stack to a 16-byte boundary.
+ // The approach we're taking here uses additional stack space to stash the
+ // stack pointer twice before aligning the pointer to 16-bytes. If the stack
+ // was 8-byte aligned, it will become 16-byte aligned -- when restoring the
+ // pointer, we can always look -8 bytes from the current position to get
+ // either of the values we've stashed in the first place.
+ pushq %rsp
+ pushq (%rsp)
+ andq $-0x10, %rsp
+ callq *%rax
+ movq 8(%rsp), %rsp
.endm
.text
# LLVM-MCA-BEGIN __xray_FunctionEntry
ASM_SYMBOL(__xray_FunctionEntry):
CFI_STARTPROC
- ALIGN_STACK_16B
SAVE_REGISTERS
// This load has to be atomic, it's concurrent with __xray_patch().
// The patched function prologue puts its xray_instr_map index into %r10d.
movl %r10d, %edi
xor %esi,%esi
- callq *%rax
+ ALIGNED_CALL_RAX
.Ltmp0:
RESTORE_REGISTERS
- RESTORE_STACK_ALIGNMENT
retq
# LLVM-MCA-END
ASM_SIZE(__xray_FunctionEntry)
# LLVM-MCA-BEGIN __xray_FunctionExit
ASM_SYMBOL(__xray_FunctionExit):
CFI_STARTPROC
- ALIGN_STACK_16B
-
// Save the important registers first. Since we're assuming that this
// function is only jumped into, we only preserve the registers for
// returning.
- subq $64, %rsp
- CFI_ADJUST_CFA_OFFSET(64)
+ subq $56, %rsp
+ CFI_DEF_CFA_OFFSET(64)
movq %rbp, 48(%rsp)
movupd %xmm0, 32(%rsp)
movupd %xmm1, 16(%rsp)
movl %r10d, %edi
movl $1, %esi
- callq *%rax
+ ALIGNED_CALL_RAX
.Ltmp2:
// Restore the important registers.
movupd 16(%rsp), %xmm1
movq 8(%rsp), %rax
movq 0(%rsp), %rdx
- addq $64, %rsp
- CFI_ADJUST_CFA_OFFSET(-64)
-
- RESTORE_STACK_ALIGNMENT
+ addq $56, %rsp
+ CFI_DEF_CFA_OFFSET(8)
retq
# LLVM-MCA-END
ASM_SIZE(__xray_FunctionExit)
# LLVM-MCA-BEGIN __xray_FunctionTailExit
ASM_SYMBOL(__xray_FunctionTailExit):
CFI_STARTPROC
- ALIGN_STACK_16B
SAVE_REGISTERS
movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
movl %r10d, %edi
movl $2, %esi
- callq *%rax
+
+ ALIGNED_CALL_RAX
.Ltmp4:
RESTORE_REGISTERS
- RESTORE_STACK_ALIGNMENT
retq
# LLVM-MCA-END
ASM_SIZE(__xray_FunctionTailExit)
# LLVM-MCA-BEGIN __xray_ArgLoggerEntry
ASM_SYMBOL(__xray_ArgLoggerEntry):
CFI_STARTPROC
- ALIGN_STACK_16B
SAVE_REGISTERS
// Again, these function pointer loads must be atomic; MOV is fine.
// 32-bit function ID becomes the first
movl %r10d, %edi
-
- callq *%rax
+ ALIGNED_CALL_RAX
.Larg1entryFail:
RESTORE_REGISTERS
- RESTORE_STACK_ALIGNMENT
retq
# LLVM-MCA-END
ASM_SIZE(__xray_ArgLoggerEntry)
testq %rax,%rax
je .LcustomEventCleanup
- callq *%rax
+ ALIGNED_CALL_RAX
.LcustomEventCleanup:
RESTORE_REGISTERS
testq %rax,%rax
je .LtypedEventCleanup
- callq *%rax
+ ALIGNED_CALL_RAX
.LtypedEventCleanup:
RESTORE_REGISTERS