[compiler-rt][XRay] re-submitting r276117, with fixes for build breakage due to extra...
authorDean Michael Berris <dberris@google.com>
Thu, 21 Jul 2016 07:39:55 +0000 (07:39 +0000)
committerDean Michael Berris <dberris@google.com>
Thu, 21 Jul 2016 07:39:55 +0000 (07:39 +0000)
Summary:
This is a fixed-up version of D21612, to address failure identified post-commit.

Original commit description:

This patch implements the initialisation and patching routines for the XRay runtime, along with the necessary trampolines for function entry/exit handling. For now we only define the basic hooks for allowing an implementation to define a handler that gets run on function entry/exit. We expose a minimal API for controlling the behaviour of the runtime (patching, cleanup, and setting the handler to invoke when instrumenting).

Fixes include:
- Gating XRay build to only Linux x86_64 and with the right dependencies in case it is the only library being built
- Including <cstddef> to fix std::size_t issue

Reviewers: kcc, rnk, echristo

Subscribers: mehdi_amini, llvm-commits

Differential Revision: https://reviews.llvm.org/D22611

llvm-svn: 276251

13 files changed:
compiler-rt/CMakeLists.txt
compiler-rt/cmake/config-ix.cmake
compiler-rt/include/CMakeLists.txt
compiler-rt/include/xray/xray_interface.h [new file with mode: 0644]
compiler-rt/lib/CMakeLists.txt
compiler-rt/lib/xray/CMakeLists.txt [new file with mode: 0644]
compiler-rt/lib/xray/xray_flags.cc [new file with mode: 0644]
compiler-rt/lib/xray/xray_flags.h [new file with mode: 0644]
compiler-rt/lib/xray/xray_flags.inc [new file with mode: 0644]
compiler-rt/lib/xray/xray_init.cc [new file with mode: 0644]
compiler-rt/lib/xray/xray_interface.cc [new file with mode: 0644]
compiler-rt/lib/xray/xray_interface_internal.h [new file with mode: 0644]
compiler-rt/lib/xray/xray_trampoline_x86.S [new file with mode: 0644]

index a067581..2f61d15 100644 (file)
@@ -37,6 +37,8 @@ option(COMPILER_RT_BUILD_BUILTINS "Build builtins" ON)
 mark_as_advanced(COMPILER_RT_BUILD_BUILTINS)
 option(COMPILER_RT_BUILD_SANITIZERS "Build sanitizers" ON)
 mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS)
+option(COMPILER_RT_BUILD_XRAY "Build xray" ON)
+mark_as_advanced(COMPILER_RT_BUILD_XRAY)
 
 if (COMPILER_RT_STANDALONE_BUILD)
   if (NOT LLVM_CONFIG_PATH)
index 6865419..92c3452 100644 (file)
@@ -161,6 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
 set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
 set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
 set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
 
 if(APPLE)
   include(CompilerRTDarwinUtils)
@@ -350,6 +351,9 @@ if(APPLE)
   list_intersect(SCUDO_SUPPORTED_ARCH
     ALL_SCUDO_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_SUPPORTED_ARCH
+    ALL_XRAY_SUPPORTED_ARCH
+               SANITIZER_COMMON_SUPPORTED_ARCH)
 else()
   # Architectures supported by compiler-rt libraries.
   filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH
@@ -373,6 +377,7 @@ else()
   filter_available_targets(ESAN_SUPPORTED_ARCH ${ALL_ESAN_SUPPORTED_ARCH})
   filter_available_targets(SCUDO_SUPPORTED_ARCH
     ${ALL_SCUDO_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
 endif()
 
 if (MSVC)
@@ -493,3 +498,9 @@ else()
   set(COMPILER_RT_HAS_SCUDO FALSE)
 endif()
 
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_XRAY TRUE)
+else()
+  set(COMPILER_RT_HAS_XRAY FALSE)
+endif()
index 5161d4e..1f8b481 100644 (file)
@@ -10,11 +10,18 @@ set(SANITIZER_HEADERS
   sanitizer/msan_interface.h
   sanitizer/tsan_interface_atomic.h)
 
+set(XRAY_HEADERS
+  xray/xray_interface.h)
+
+set(COMPILER_RT_HEADERS
+  ${SANITIZER_HEADERS}
+       ${XRAY_HEADERS})
+
 set(output_dir ${COMPILER_RT_OUTPUT_DIR}/include)
 
 # Copy compiler-rt headers to the build tree.
 set(out_files)
-foreach( f ${SANITIZER_HEADERS} )
+foreach( f ${COMPILER_RT_HEADERS} )
   set( src ${CMAKE_CURRENT_SOURCE_DIR}/${f} )
   set( dst ${output_dir}/${f} )
   add_custom_command(OUTPUT ${dst}
@@ -32,3 +39,7 @@ set_target_properties(compiler-rt-headers PROPERTIES FOLDER "Compiler-RT Misc")
 install(FILES ${SANITIZER_HEADERS}
   PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
   DESTINATION ${COMPILER_RT_INSTALL_PATH}/include/sanitizer)
+# Install xray headers.
+install(FILES ${XRAY_HEADERS}
+  PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+  DESTINATION ${COMPILER_RT_INSTALL_PATH}/include/xray)
diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h
new file mode 100644 (file)
index 0000000..b8a0a61
--- /dev/null
@@ -0,0 +1,66 @@
+//===-- xray_interface.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// APIs for controlling XRay functionality explicitly.
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_INTERFACE_H
+#define XRAY_XRAY_INTERFACE_H
+
+#include <cstdint>
+
+extern "C" {
+
+enum XRayEntryType { ENTRY = 0, EXIT = 1 };
+
+// Provide a function to invoke for when instrumentation points are hit. This is
+// a user-visible control surface that overrides the default implementation. The
+// function provided should take the following arguments:
+//
+//   - function id: an identifier that indicates the id of a function; this id
+//                  is generated by xray; the mapping between the function id
+//                  and the actual function pointer is available through
+//                  __xray_table.
+//   - entry type: identifies what kind of instrumentation point was encountered
+//                 (function entry, function exit, etc.). See the enum
+//                 XRayEntryType for more details.
+//
+// Returns 1 on success, 0 on error.
+extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
+
+// This removes whatever the currently provided handler is. Returns 1 on
+// success, 0 on error.
+extern int __xray_remove_handler();
+
+enum XRayPatchingStatus {
+  NOT_INITIALIZED = 0,
+  NOTIFIED = 1,
+  ONGOING = 2,
+  FAILED = 3
+};
+
+// This tells XRay to patch the instrumentation points. This is an asynchronous
+// process, and returns the following status in specific cases:
+//
+//   - 0 : XRay is not initialized.
+//   - 1 : We've done the notification.
+//   - 2 : Patching / un-patching is on-going.
+extern XRayPatchingStatus __xray_patch();
+
+// Reverses the effect of __xray_patch(). This is an asynchronous process, and
+// returns the following status in specific cases.
+//
+//   - 0 : XRay is not initialized.
+//   - 1 : We've done the notification.
+//   - 2 : Patching / un-patching is on-going.
+extern int __xray_unpatch();
+}
+
+#endif
index a2b55c4..ce96fe4 100644 (file)
@@ -4,6 +4,15 @@
 include(AddCompilerRT)
 include(SanitizerUtils)
 
+# Hoist the building of sanitizer_common on whether we're building either the
+# sanitizers or xray (or both).
+#
+#TODO: Refactor sanitizer_common into smaller pieces (e.g. flag parsing, utils).
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND
+    (COMPILER_RT_BUILD_SANITIZERS OR COMPILER_RT_BUILD_XRAY))
+  add_subdirectory(sanitizer_common)
+endif()
+
 if(COMPILER_RT_BUILD_BUILTINS)
   add_subdirectory(builtins)
 endif()
@@ -14,7 +23,6 @@ if(COMPILER_RT_BUILD_SANITIZERS)
   endif()
 
   if(COMPILER_RT_HAS_SANITIZER_COMMON)
-    add_subdirectory(sanitizer_common)
     add_subdirectory(stats)
     add_subdirectory(lsan)
     add_subdirectory(ubsan)
@@ -57,3 +65,7 @@ if(COMPILER_RT_BUILD_SANITIZERS)
     add_subdirectory(scudo)
   endif()
 endif()
+
+if(COMPILER_RT_BUILD_XRAY AND COMPILER_RT_HAS_XRAY)
+  add_subdirectory(xray)
+endif()
diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt
new file mode 100644 (file)
index 0000000..9b37a8e
--- /dev/null
@@ -0,0 +1,47 @@
+# Build for the XRay runtime support library.
+
+set(XRAY_SOURCES
+  xray_init.cc
+       xray_interface.cc
+       xray_flags.cc
+)
+
+set(x86_64_SOURCES
+               xray_trampoline_x86.S
+               ${XRAY_SOURCES})
+
+include_directories(..)
+include_directories(../../include)
+
+set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+
+set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
+
+add_compiler_rt_object_libraries(RTXray
+               ARCHS ${XRAY_SUPPORTED_ARCH}
+               SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+               DEFS ${XRAY_COMMON_DEFINITIONS})
+
+add_custom_target(xray)
+set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
+               RTSanitizerCommon
+               RTSanitizerCommonLibc)
+
+foreach (arch ${XRAY_SUPPORTED_ARCH})
+               if (CAN_TARGET_${arch})
+                               add_compiler_rt_runtime(clang_rt.xray
+                                               STATIC
+                                               ARCHS ${arch}
+                                               SOURCES ${${arch}_SOURCES}
+                                               CFLAGS ${XRAY_CFLAGS}
+                                               DEFS ${XRAY_COMMON_DEFINITIONS}
+                                               OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+                                               PARENT_TARGET xray)
+               endif ()
+endforeach()
+
+add_dependencies(compiler-rt xray)
+
+# if(COMPILER_RT_INCLUDE_TESTS)
+#   add_subdirectory(tests)
+# endif()
diff --git a/compiler-rt/lib/xray/xray_flags.cc b/compiler-rt/lib/xray/xray_flags.cc
new file mode 100644 (file)
index 0000000..6f82912
--- /dev/null
@@ -0,0 +1,61 @@
+//===-- xray_flags.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+Flags xray_flags_dont_use_directly; // use via flags().
+
+void Flags::SetDefaults() {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+static void RegisterXRayFlags(FlagParser *P, Flags *F) {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \
+  RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+void InitializeFlags() {
+  SetCommonFlagsDefaults();
+  auto *F = flags();
+  F->SetDefaults();
+
+  FlagParser XRayParser;
+  RegisterXRayFlags(&XRayParser, F);
+  RegisterCommonFlags(&XRayParser);
+
+  // Override from command line.
+  XRayParser.ParseString(GetEnv("XRAY_OPTIONS"));
+
+  InitializeCommonFlags();
+
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+
+  if (common_flags()->help) {
+    XRayParser.PrintFlagDescriptions();
+  }
+}
+
+} // namespace __xray
diff --git a/compiler-rt/lib/xray/xray_flags.h b/compiler-rt/lib/xray/xray_flags.h
new file mode 100644 (file)
index 0000000..2ecf5fb
--- /dev/null
@@ -0,0 +1,37 @@
+//===-- xray_flags.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// XRay runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_FLAGS_H
+#define XRAY_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+namespace __xray {
+
+struct Flags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+
+  void SetDefaults();
+};
+
+extern Flags xray_flags_dont_use_directly;
+inline Flags *flags() { return &xray_flags_dont_use_directly; }
+
+void InitializeFlags();
+
+} // namespace __xray
+
+#endif // XRAY_FLAGS_H
diff --git a/compiler-rt/lib/xray/xray_flags.inc b/compiler-rt/lib/xray/xray_flags.inc
new file mode 100644 (file)
index 0000000..bc2f45e
--- /dev/null
@@ -0,0 +1,18 @@
+//===-- xray_flags.inc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(bool, patch_premain, true,
+          "Whether to patch instrumentation points before main.")
diff --git a/compiler-rt/lib/xray/xray_init.cc b/compiler-rt/lib/xray/xray_init.cc
new file mode 100644 (file)
index 0000000..8c2a5d3
--- /dev/null
@@ -0,0 +1,66 @@
+//===-- xray_init.cc --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic.
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <fcntl.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+extern "C" {
+extern void __xray_init();
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
+}
+
+using namespace __xray;
+
+// We initialize some global variables that pertain to specific sections of XRay
+// data structures in the binary. We do this for the current process using
+// /proc/curproc/map and make sure that we're able to get it. We signal failure
+// via a global atomic boolean to indicate whether we've initialized properly.
+//
+std::atomic<bool> XRayInitialized{false};
+
+// This should always be updated before XRayInitialized is updated.
+std::atomic<__xray::XRaySledMap> XRayInstrMap{};
+
+// __xray_init() will do the actual loading of the current process' memory map
+// and then proceed to look for the .xray_instr_map section/segment.
+void __xray_init() {
+  InitializeFlags();
+  if (__start_xray_instr_map == nullptr) {
+    Report("XRay instrumentation map missing. Not initializing XRay.\n");
+    return;
+  }
+
+  // Now initialize the XRayInstrMap global struct with the address of the
+  // entries, reinterpreted as an array of XRaySledEntry objects. We use the
+  // virtual pointer we have from the section to provide us the correct
+  // information.
+  __xray::XRaySledMap SledMap{};
+  SledMap.Sleds = __start_xray_instr_map;
+  SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
+  XRayInstrMap.store(SledMap, std::memory_order_release);
+  XRayInitialized.store(true, std::memory_order_release);
+
+  if (flags()->patch_premain)
+    __xray_patch();
+}
+
+__attribute__((section(".preinit_array"),
+               used)) void (*__local_xray_preinit)(void) = __xray_init;
diff --git a/compiler-rt/lib/xray/xray_interface.cc b/compiler-rt/lib/xray/xray_interface.cc
new file mode 100644 (file)
index 0000000..997829e
--- /dev/null
@@ -0,0 +1,179 @@
+//===-- xray_interface.cpp --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cstdint>
+#include <cstdio>
+#include <errno.h>
+#include <limits>
+#include <sys/mman.h>
+
+namespace __xray {
+
+// This is the function to call when we encounter the entry or exit sleds.
+std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
+
+} // namespace __xray
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.s files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+}
+
+extern std::atomic<bool> XRayInitialized;
+extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
+
+int __xray_set_handler(void (*entry)(int32_t, XRayEntryType)) {
+  if (XRayInitialized.load(std::memory_order_acquire)) {
+    __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
+    return 1;
+  }
+  return 0;
+}
+
+std::atomic<bool> XRayPatching{false};
+
+XRayPatchingStatus __xray_patch() {
+  // FIXME: Make this happen asynchronously. For now just do this sequentially.
+  if (!XRayInitialized.load(std::memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  static bool NotPatching = false;
+  if (!XRayPatching.compare_exchange_strong(NotPatching, true,
+                                            std::memory_order_acq_rel,
+                                            std::memory_order_acquire)) {
+    return XRayPatchingStatus::ONGOING; // Already patching.
+  }
+
+  // Step 1: Compute the function id, as a unique identifier per function in the
+  // instrumentation map.
+  __xray::XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire);
+  if (InstrMap.Entries == 0)
+    return XRayPatchingStatus::NOT_INITIALIZED;
+
+  int32_t FuncId = 1;
+  static constexpr uint8_t CallOpCode = 0xe8;
+  static constexpr uint16_t MovR10Seq = 0xba41;
+  static constexpr uint8_t JmpOpCode = 0xe9;
+  uint64_t CurFun = 0;
+  for (std::size_t I = 0; I < InstrMap.Entries; I++) {
+    auto Sled = InstrMap.Sleds[I];
+    auto F = Sled.Function;
+    if (CurFun == 0)
+      CurFun = F;
+    if (F != CurFun) {
+      ++FuncId;
+      CurFun = F;
+    }
+
+    // While we're here, we should patch the nop sled. To do that we mprotect
+    // the page containing the function to be writeable.
+    void *PageAlignedAddr =
+        reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1));
+    std::size_t MProtectLen =
+        (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr);
+    if (mprotect(PageAlignedAddr, MProtectLen,
+                 PROT_READ | PROT_WRITE | PROT_EXEC) == -1) {
+      printf("Failed mprotect: %d\n", errno);
+      return XRayPatchingStatus::FAILED;
+    }
+
+    static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+    static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+    if (Sled.Kind == XRayEntryType::ENTRY) {
+      // Here we do the dance of replacing the following sled:
+      //
+      // xray_sled_n:
+      //   jmp +9
+      //   <9 byte nop>
+      //
+      // With the following:
+      //
+      //   mov r10d, <function id>
+      //   call <relative 32bit offset to entry trampoline>
+      //
+      // We need to do this in the following order:
+      //
+      // 1. Put the function id first, 2 bytes from the start of the sled (just
+      // after the 2-byte jmp instruction).
+      // 2. Put the call opcode 6 bytes from the start of the sled.
+      // 3. Put the relative offset 7 bytes from the start of the sled.
+      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+      // opcode and first operand.
+      //
+      // Prerequisite is to compute the relative offset to the
+      // __xray_FunctionEntry function's address.
+      int64_t TrampolineOffset =
+          reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+          (static_cast<int64_t>(Sled.Address) + 11);
+      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+        // FIXME: Print out an error here.
+        continue;
+      }
+      *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+      *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+      *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+      std::atomic_store_explicit(
+          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+          std::memory_order_release);
+    }
+
+    if (Sled.Kind == XRayEntryType::EXIT) {
+      // Here we do the dance of replacing the following sled:
+      //
+      // xray_sled_n:
+      //   ret
+      //   <10 byte nop>
+      //
+      // With the following:
+      //
+      //   mov r10d, <function id>
+      //   jmp <relative 32bit offset to exit trampoline>
+      //
+      // 1. Put the function id first, 2 bytes from the start of the sled (just
+      // after the 1-byte ret instruction).
+      // 2. Put the jmp opcode 6 bytes from the start of the sled.
+      // 3. Put the relative offset 7 bytes from the start of the sled.
+      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+      // opcode and first operand.
+      //
+      // Prerequisite is to compute the relative offset fo the
+      // __xray_FunctionExit function's address.
+      int64_t TrampolineOffset =
+          reinterpret_cast<int64_t>(__xray_FunctionExit) -
+          (static_cast<int64_t>(Sled.Address) + 11);
+      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+        // FIXME: Print out an error here.
+        continue;
+      }
+      *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+      *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+      *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+      std::atomic_store_explicit(
+          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+          std::memory_order_release);
+    }
+
+    if (mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC) == -1) {
+      printf("Failed mprotect: %d\n", errno);
+      return XRayPatchingStatus::FAILED;
+    }
+  }
+  XRayPatching.store(false, std::memory_order_release);
+  return XRayPatchingStatus::NOTIFIED;
+}
diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h
new file mode 100644 (file)
index 0000000..6208c11
--- /dev/null
@@ -0,0 +1,42 @@
+//===-- xray_interface_internal.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions. See also include/xray/xray_interface.h.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_INTERFACE_INTERNAL_H
+#define XRAY_INTERFACE_INTERNAL_H
+
+#include "xray/xray_interface.h"
+#include <cstddef>
+#include <cstdint>
+
+extern "C" {
+
+struct XRaySledEntry {
+  uint64_t Address;
+  uint64_t Function;
+  unsigned char Kind;
+  unsigned char AlwaysInstrument;
+  unsigned char Padding[14]; // Need 32 bytes
+};
+}
+
+namespace __xray {
+
+struct XRaySledMap {
+  const XRaySledEntry *Sleds;
+  size_t Entries;
+};
+
+} // namespace __xray
+
+#endif
diff --git a/compiler-rt/lib/xray/xray_trampoline_x86.S b/compiler-rt/lib/xray/xray_trampoline_x86.S
new file mode 100644 (file)
index 0000000..8b8a108
--- /dev/null
@@ -0,0 +1,93 @@
+//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the X86-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+       .text
+       .file "xray_trampoline_x86.S"
+       .globl __xray_FunctionEntry
+       .align 16, 0x90
+       .type __xray_FunctionEntry,@function
+
+__xray_FunctionEntry:
+  .cfi_startproc
+  // Save caller provided registers before doing any actual work.
+       pushq %rbp
+       .cfi_def_cfa_offset 16
+       subq $72, %rsp
+       movq    %rdi, 64(%rsp)
+       movq  %rax, 56(%rsp)
+       movq  %rdx, 48(%rsp)
+       movq    %rsi, 40(%rsp)
+       movq    %rcx, 32(%rsp)
+       movq    %r8, 24(%rsp)
+       movq    %r9, 16(%rsp)
+
+       // de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
+       // load (on x86 is a normal mov instruction).
+       movq    _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+       testq   %rax, %rax
+       je      .Ltmp0
+
+       // assume that %r10d has the function id.
+       movl    %r10d, %edi
+       xor     %esi,%esi
+       callq   *%rax
+.Ltmp0:
+  // restore the registers
+       movq    64(%rsp), %rdi
+       movq  56(%rsp), %rax
+       movq  48(%rsp), %rdx
+       movq    40(%rsp), %rsi
+       movq    32(%rsp), %rcx
+       movq    24(%rsp), %r8
+       movq    16(%rsp), %r9
+       addq    $72, %rsp
+       popq    %rbp
+       retq
+.Ltmp1:
+       .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
+       .cfi_endproc
+
+       .globl __xray_FunctionExit
+       .align 16, 0x90
+       .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+       .cfi_startproc
+       // Save the important registers first. Since we're assuming that this
+       // function is only jumped into, we only preserve the registers for
+       // returning.
+       // FIXME: Figure out whether this is sufficient.
+       pushq   %rbp
+       .cfi_def_cfa_offset 16
+       subq    $24, %rsp
+       .cfi_def_cfa_offset 32
+       movq    %rax, 16(%rsp)
+       movq    %rdx, 8(%rsp)
+       movq    _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+       testq %rax,%rax
+       je      .Ltmp2
+
+       movl    %r10d, %edi
+       movl    $1, %esi
+       callq   *%rax
+.Ltmp2:
+  // Restore the important registers.
+       movq    16(%rsp), %rax
+       movq    8(%rsp), %rdx
+       addq    $24, %rsp
+       popq    %rbp
+       retq
+.Ltmp3:
+       .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
+       .cfi_endproc