From: Dean Michael Berris Date: Wed, 15 Nov 2017 03:35:42 +0000 (+0000) Subject: [XRay][compiler-rt][x86_64] Align the stack before and after calling handlers X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6a40b2d0a5851f57541aaf8ae3d2e9dfe3f6cd2f;p=platform%2Fupstream%2Fllvm.git [XRay][compiler-rt][x86_64] Align the stack before and after calling handlers Summary: This change fixes the XRay trampolines aside from the __xray_CustomEvent trampoline to align the stack to 16-byte boundaries before calling the handler. Before this change we've not been explicitly aligning the stack to 16-byte boundaries, which makes it dangerous when calling handlers that leave the stack in a state that isn't strictly 16-byte aligned after calling the handlers. We add a test that makes sure we can handle these cases appropriately after the changes, and prevents us from regressing the state moving forward. Fixes http://llvm.org/PR35294. Reviewers: pelikan, pcc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D40004 llvm-svn: 318261 --- diff --git a/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/compiler-rt/lib/xray/xray_trampoline_x86_64.S index ffbfb5c..33fa0e7 100644 --- a/compiler-rt/lib/xray/xray_trampoline_x86_64.S +++ b/compiler-rt/lib/xray/xray_trampoline_x86_64.S @@ -60,6 +60,20 @@ .cfi_def_cfa_offset 8 .endm +.macro ALIGNED_CALL_RAX + // Call the logging handler, after aligning the stack to a 16-byte boundary. + // The approach we're taking here uses additional stack space to stash the + // stack pointer twice before aligning the pointer to 16-bytes. If the stack + // was 8-byte aligned, it will become 16-byte aligned -- when restoring the + // pointer, we can always look -8 bytes from the current position to get + // either of the values we've stashed in the first place. + pushq %rsp + pushq (%rsp) + andq $-0x10, %rsp + callq *%rax + movq 8(%rsp), %rsp +.endm + .text .file "xray_trampoline_x86.S" @@ -82,7 +96,8 @@ __xray_FunctionEntry: // The patched function prolog puts its xray_instr_map index into %r10d. movl %r10d, %edi xor %esi,%esi - callq *%rax + ALIGNED_CALL_RAX + .Ltmp0: RESTORE_REGISTERS retq @@ -113,7 +128,8 @@ __xray_FunctionExit: movl %r10d, %edi movl $1, %esi - callq *%rax + ALIGNED_CALL_RAX + .Ltmp2: // Restore the important registers. movq 48(%rsp), %rbp @@ -143,7 +159,8 @@ __xray_FunctionTailExit: movl %r10d, %edi movl $2, %esi - callq *%rax + + ALIGNED_CALL_RAX .Ltmp4: RESTORE_REGISTERS @@ -181,7 +198,7 @@ __xray_ArgLoggerEntry: // 32-bit function ID becomes the first movl %r10d, %edi - callq *%rax + ALIGNED_CALL_RAX .Larg1entryFail: RESTORE_REGISTERS @@ -207,18 +224,7 @@ __xray_CustomEvent: testq %rax,%rax je .LcustomEventCleanup - // At this point we know that rcx and rdx already has the data, so we just - // call the logging handler, after aligning the stack to a 16-byte boundary. - // The approach we're taking here uses additional stack space to stash the - // stack pointer twice before aligning the pointer to 16-bytes. If the stack - // was 8-byte aligned, it will become 16-byte aligned -- when restoring the - // pointer, we can always look -8 bytes from the current position to get - // either of the values we've stashed in the first place. - pushq %rsp - pushq (%rsp) - andq $-0x10, %rsp - callq *%rax - movq 8(%rsp), %rsp + ALIGNED_CALL_RAX .LcustomEventCleanup: RESTORE_REGISTERS diff --git a/compiler-rt/test/xray/TestCases/Linux/common-trampoline-alignment.cc b/compiler-rt/test/xray/TestCases/Linux/common-trampoline-alignment.cc new file mode 100644 index 0000000..5d1cc1e --- /dev/null +++ b/compiler-rt/test/xray/TestCases/Linux/common-trampoline-alignment.cc @@ -0,0 +1,57 @@ +// Make sure that we're aligning the stack properly to support handlers that +// expect 16-byte alignment of the stack. +// +// RUN: %clangxx_xray -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false" \ +// RUN: %run %t 2>&1 +// REQUIRES: x86_64-linux +// REQUIRES: built-in-llvm-tree +#include "xray/xray_interface.h" +#include +#include + +[[clang::xray_never_instrument]] __attribute__((weak)) __m128 f(__m128 *i) { + return *i; +} + +[[clang::xray_always_instrument]] __attribute__((noinline)) void noarg() { + __m128 v = {}; + f(&v); +} + +[[ clang::xray_always_instrument, clang::xray_log_args(1) ]] +__attribute__((noinline)) void arg1(int) { + __m128 v = {}; + f(&v); +} + +[[clang::xray_always_instrument]] __attribute__((noinline)) +void no_alignment() {} + +[[clang::xray_never_instrument]] void noarg_handler(int32_t, + XRayEntryType) { + printf("noarg handler called\n"); + __m128 v = {}; + f(&v); +} + +[[clang::xray_never_instrument]] void arg1_handler(int32_t, XRayEntryType, + uint64_t) { + printf("arg1 handler called\n"); + __m128 v = {}; + f(&v); +} + +int main(int argc, char *argv[]) { + __xray_set_handler(noarg_handler); + __xray_set_handler_arg1(arg1_handler); + __xray_patch(); + noarg(); // CHECK: noarg handler called + arg1(argc); // CHECK: arg1 handler called + no_alignment(); + __xray_unpatch(); + __xray_remove_handler(); + __xray_remove_handler_arg1(); + noarg(); + arg1(argc); +}