From 3065014026db868bfcd76adef7247319ea96e0b2 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Fri, 22 Feb 2013 09:34:19 +0000 Subject: [PATCH] [msan] MSanDR: initial commit. MSanDR is a DynamoRio-based tool that handles uninstrumented libraries and dynamically generated code for MSan. llvm-svn: 175883 --- compiler-rt/lib/CMakeLists.txt | 1 + compiler-rt/lib/msandr/CMakeLists.txt | 26 ++ compiler-rt/lib/msandr/README.txt | 33 ++ compiler-rt/lib/msandr/msandr.cc | 670 ++++++++++++++++++++++++++++++++++ 4 files changed, 730 insertions(+) create mode 100644 compiler-rt/lib/msandr/CMakeLists.txt create mode 100644 compiler-rt/lib/msandr/README.txt create mode 100644 compiler-rt/lib/msandr/msandr.cc diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt index 83f2a60..2538a4d 100644 --- a/compiler-rt/lib/CMakeLists.txt +++ b/compiler-rt/lib/CMakeLists.txt @@ -14,6 +14,7 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT ANDROID) # ThreadSanitizer and MemorySanitizer are supported on Linux only. add_subdirectory(tsan) add_subdirectory(msan) + add_subdirectory(msandr) endif() # FIXME: Add support for the profile library. diff --git a/compiler-rt/lib/msandr/CMakeLists.txt b/compiler-rt/lib/msandr/CMakeLists.txt new file mode 100644 index 0000000..5a96a9d --- /dev/null +++ b/compiler-rt/lib/msandr/CMakeLists.txt @@ -0,0 +1,26 @@ + +if(DynamoRIO_DIR AND DrMemoryFramework_DIR) + set(CMAKE_COMPILER_IS_GNUCC 1) + find_package(DynamoRIO) + find_package(DrMemoryFramework) + + set(arch "x86_64") + add_library(clang_rt.msandr-${arch} SHARED msandr.cc) + configure_DynamoRIO_client(clang_rt.msandr-${arch}) + + function(append_target_cflags tgt cflags) + get_property(old_cflags TARGET clang_rt.msandr-${arch} PROPERTY COMPILE_FLAGS) + set_property(TARGET clang_rt.msandr-${arch} PROPERTY COMPILE_FLAGS "${old_cflags} ${cflags}") + endfunction(append_target_cflags) + + append_target_cflags(clang_rt.msandr-${arch} "-Wno-c++11-extensions") + + use_DynamoRIO_extension(clang_rt.msandr-${arch} drutil) + use_DynamoRIO_extension(clang_rt.msandr-${arch} drmgr) + use_DynamoRIO_extension(clang_rt.msandr-${arch} drsyscall) + + set_target_properties(clang_rt.msandr-${arch} PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR}) + install(TARGETS clang_rt.msandr-${arch} + LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}) +endif() diff --git a/compiler-rt/lib/msandr/README.txt b/compiler-rt/lib/msandr/README.txt new file mode 100644 index 0000000..b328910 --- /dev/null +++ b/compiler-rt/lib/msandr/README.txt @@ -0,0 +1,33 @@ +Experimental DynamoRIO-MSAN plugin (codename "MSanDR"). +Supports Linux/x86_64 only. + +Building: + 1. First, download and build DynamoRIO: + (svn co https://dynamorio.googlecode.com/svn/trunk dr && \ + cd dr && mkdir build && cd build && \ + cmake -DDR_EXT_DRMGR_STATIC=ON -DDR_EXT_DRSYMS_STATIC=ON \ + -DDR_EXT_DRUTIL_STATIC=ON -DDR_EXT_DRWRAP_STATIC=ON .. && \ + make -j10 && make install) + + 2. Download and build DrMemory (for DrSyscall extension) + (svn co http://drmemory.googlecode.com/svn/trunk/ drmemory && \ + cd drmemory && mkdir build && cd build && \ + cmake -DDynamoRIO_DIR=`pwd`/../../dr/exports/cmake .. && \ + make -j10 && make install) + + NOTE: The line above will build a shared DrSyscall library in a non-standard + location. This will require the use of LD_LIBRARY_PATH when running MSanDR. + To build a static DrSyscall library (and link it into MSanDR), add + -DDR_EXT_DRSYSCALL_STATIC=ON to the CMake invocation above, but + beware: DrSyscall is LGPL. + + 3. Now, build LLVM with two extra CMake flags: + -DDynamoRIO_DIR=/exports/cmake + -DDrMemoryFramework_DIR=/exports64/drmf + + This will build a lib/clang/$VERSION/lib/linux/libclang_rt.msandr-x86_64.so + +Running: + /exports/bin64/drrun -c lib/clang/$VERSION/lib/linux/libclang_rt.msandr-x86_64.so -- test_binary + +MSan unit tests contain several tests for MSanDR (use MemorySanitizerDr.* gtest filter). diff --git a/compiler-rt/lib/msandr/msandr.cc b/compiler-rt/lib/msandr/msandr.cc new file mode 100644 index 0000000..235a1ed --- /dev/null +++ b/compiler-rt/lib/msandr/msandr.cc @@ -0,0 +1,670 @@ +//===-- msandr.cc ---------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemorySanitizer. +// +// DynamoRio client for MemorySanitizer. +// +// MemorySanitizer requires that all program code is instrumented. Any memory +// store that can turn an uninitialized value into an initialized value must be +// observed by the tool, otherwise we risk reporting a false UMR. +// +// This also includes any libraries that the program depends on. +// +// In the case when rebuilding all program dependencies with MemorySanitizer is +// problematic, an experimental MSanDR tool (the code you are currently looking +// at) can be used. It is a DynamoRio-based tool that uses dynamic +// instrumentation to +// * Unpoison all memory stores. +// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and +// return value shadow on anything that looks like a function call or a return +// from a function. +// +// This tool does not detect the use of uninitialized values in uninstrumented +// libraries. It merely gets rid of false positives by marking all data that +// passes through uninstrumented code as fully initialized. +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +using std::string; + +#define TESTALL(mask, var) (((mask) & (var)) == (mask)) +#define TESTANY(mask, var) (((mask) & (var)) != 0) + +#define CHECK_IMPL(condition, file, line) \ + do { \ + if (!(condition)) { \ + dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \ + dr_abort(); \ + } \ + } while (0) // TODO: stacktrace + +#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__) + +#define VERBOSITY 0 + +namespace { + +class ModuleData { +public: + ModuleData(); + ModuleData(const module_data_t *info); + // Yes, we want default copy, assign, and dtor semantics. + +public: + app_pc start_; + app_pc end_; + // Full path to the module. + string path_; + module_handle_t handle_; + bool should_instrument_; + bool executed_; +}; + +string g_app_path; + +int msan_retval_tls_offset; +int msan_param_tls_offset; + +// A vector of loaded modules sorted by module bounds. We lookup the current PC +// in here from the bb event. This is better than an rb tree because the lookup +// is faster and the bb event occurs far more than the module load event. +std::vector g_module_list; + +ModuleData::ModuleData() + : start_(NULL), end_(NULL), path_(""), handle_(NULL), + should_instrument_(false), executed_(false) { +} + +ModuleData::ModuleData(const module_data_t *info) + : start_(info->start), end_(info->end), path_(info->full_path), + handle_(info->handle), + // We'll check the black/white lists later and adjust this. + should_instrument_(true), executed_(false) { +} + +int(*__msan_get_retval_tls_offset)(); +int(*__msan_get_param_tls_offset)(); + +void InitializeMSanCallbacks() { + module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); + if (!app) { + dr_printf("%s - oops, dr_lookup_module_by_name failed!\n", + dr_get_application_name()); + CHECK(app); + } + g_app_path = app->full_path; + + const char *callback_name = "__msan_get_retval_tls_offset"; + __msan_get_retval_tls_offset = + (int(*)()) dr_get_proc_address(app->handle, callback_name); + if (__msan_get_retval_tls_offset == NULL) { + dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path); + CHECK(__msan_get_retval_tls_offset); + } + + callback_name = "__msan_get_param_tls_offset"; + __msan_get_param_tls_offset = + (int(*)()) dr_get_proc_address(app->handle, callback_name); + if (__msan_get_param_tls_offset == NULL) { + dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path); + CHECK(__msan_get_param_tls_offset); + } +} + +#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL) + +// FIXME: Handle absolute addresses and PC-relative addresses. +// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have +// a zero base anyway. +bool OperandIsInteresting(opnd_t opnd) { + return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS && + opnd_get_segment(opnd) != DR_SEG_GS); +} + +bool WantToInstrument(instr_t *instr) { + // TODO: skip push instructions? + switch (instr_get_opcode(instr)) { + // FIXME: support the instructions excluded below: + case OP_rep_cmps: + // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx + return false; + } + + // Labels appear due to drutil_expand_rep_string() + if (instr_is_label(instr)) + return false; + + CHECK(instr_ok_to_mangle(instr) == true); + + if (instr_writes_memory(instr)) { + for (int d = 0; d < instr_num_dsts(instr); d++) { + opnd_t op = instr_get_dst(instr, d); + if (OperandIsInteresting(op)) + return true; + } + } + + return false; +} + +#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what); +#define PREF(at, what) instrlist_meta_preinsert(bb, at, what); + +void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op, + bool is_write) { + bool need_to_restore_eflags = false; + uint flags = instr_get_arith_flags(instr); + // TODO: do something smarter with flags and spills in general? + // For example, spill them only once for a sequence of instrumented + // instructions that don't change/read flags. + + if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) { + if (VERBOSITY > 1) + dr_printf("Spilling eflags...\n"); + need_to_restore_eflags = true; + // TODO: Maybe sometimes don't need to 'seto'. + // TODO: Maybe sometimes don't want to spill XAX here? + // TODO: No need to spill XAX here if XAX is not used in the BB. + dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + dr_save_arith_flags_to_xax(drcontext, bb, instr); + dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); + dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + } + +#if 0 + dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n", + opnd_is_memory_reference(op), opnd_is_base_disp(op), + opnd_is_base_disp(op) ? opnd_get_index(op) : -1, + opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op), + opnd_is_base_disp(op) ? opnd_get_disp(op) : -1); +#endif + + reg_id_t R1; + bool address_in_R1 = false; + if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL && + opnd_get_disp(op) == 0) { + // If this is a simple access with no offset or index, we can just use the + // base for R1. + address_in_R1 = true; + R1 = opnd_get_base(op); + } else { + // Otherwise, we need to compute the addr into R1. + // TODO: reuse some spare register? e.g. r15 on x64 + // TODO: might be used as a non-mem-ref register? + R1 = DR_REG_XAX; + } + CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong. + + // Pick R2 that's not R1 or used by the operand. It's OK if the instr uses + // R2 elsewhere, since we'll restore it before instr. + reg_id_t GPR_TO_USE_FOR_R2[] = { + DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX + // Don't forget to update the +4 below if you add anything else! + }; + std::set unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4); + unused_registers.erase(R1); + for (int j = 0; j < opnd_num_regs_used(op); j++) { + unused_registers.erase(opnd_get_reg_used(op, j)); + } + + CHECK(unused_registers.size() > 0); + reg_id_t R2 = *unused_registers.begin(); + CHECK(R1 != R2); + + // Save the current values of R1 and R2. + dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); + // TODO: Something smarter than spilling a "fixed" register R2? + dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); + + if (!address_in_R1) + CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2)); + PRE(instr, mov_imm(drcontext, opnd_create_reg(R2), + OPND_CREATE_INT64(0xffffbfffffffffff))); + PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2))); + // There is no mov_st of a 64-bit immediate, so... + opnd_size_t op_size = opnd_get_size(op); + CHECK(op_size != OPSZ_NA); + uint access_size = opnd_size_in_bytes(op_size); + if (access_size <= 4) { + PRE(instr, + mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), + opnd_create_immed_int((ptr_int_t) 0, op_size))); + } else { + // FIXME: tail? + for (uint ofs = 0; ofs < access_size; ofs += 4) { + PRE(instr, + mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0))); + } + } + + // Restore the registers and flags. + dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); + dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); + + if (need_to_restore_eflags) { + if (VERBOSITY > 1) + dr_printf("Restoring eflags\n"); + // TODO: Check if it's reverse to the dr_restore_reg above and optimize. + dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); + dr_restore_arith_flags_from_xax(drcontext, bb, instr); + dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + } + + // The original instruction is left untouched. The above instrumentation is just + // a prefix. +} + +void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) { + dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + + // Clobbers nothing except xax. + bool res = + dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); + CHECK(res); + + // TODO: unpoison more bytes? + PRE(instr, + mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset), + OPND_CREATE_INT32(0))); + + dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + + // The original instruction is left untouched. The above instrumentation is just + // a prefix. +} + +void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb, + instr_t *instr) { + dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + + // Clobbers nothing except xax. + bool res = + dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); + CHECK(res); + + // TODO: unpoison more bytes? + for (int i = 0; i < 6; ++i) { + PRE(instr, + mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset + + i * sizeof(void *)), + OPND_CREATE_INT32(0))); + } + + dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); + + // The original instruction is left untouched. The above instrumentation is just + // a prefix. +} + +// For use with binary search. Modules shouldn't overlap, so we shouldn't have +// to look at end_. If that can happen, we won't support such an application. +bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) { + return left.start_ < right.start_; +} + +// Look up the module containing PC. Should be relatively fast, as its called +// for each bb instrumentation. +ModuleData *LookupModuleByPC(app_pc pc) { + ModuleData fake_mod_data; + fake_mod_data.start_ = pc; + std::vector::iterator it = + lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data, + ModuleDataCompareStart); + // if (it == g_module_list.end()) + // return NULL; + if (it == g_module_list.end() || pc < it->start_) + --it; + CHECK(it->start_ <= pc); + if (pc >= it->end_) { + // We're past the end of this module. We shouldn't be in the next module, + // or lower_bound lied to us. + ++it; + CHECK(it == g_module_list.end() || pc < it->start_); + return NULL; + } + + // OK, we found the module. + return &*it; +} + +bool ShouldInstrumentNonModuleCode() { return true; } + +bool ShouldInstrumentModule(ModuleData *mod_data) { + // TODO(rnk): Flags for blacklist would get wired in here. + generic_func_t p = + dr_get_proc_address(mod_data->handle_, "__msan_track_origins"); + return !p; +} + +bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) { + ModuleData *mod_data = LookupModuleByPC(pc); + if (pmod_data) + *pmod_data = mod_data; + if (mod_data != NULL) { + // This module is on a blacklist. + if (!mod_data->should_instrument_) { + return false; + } + } else if (!ShouldInstrumentNonModuleCode()) { + return false; + } + return true; +} + +// TODO(rnk): Make sure we instrument after __msan_init. +dr_emit_flags_t +event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb, + bool for_trace, bool translating) { + app_pc pc = dr_fragment_app_pc(tag); + + if (ShouldInstrumentPc(pc, NULL)) + CHECK(drutil_expand_rep_string(drcontext, bb)); + + return DR_EMIT_PERSISTABLE; +} + +dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, + bool for_trace, bool translating) { + app_pc pc = dr_fragment_app_pc(tag); + ModuleData *mod_data; + + if (!ShouldInstrumentPc(pc, &mod_data)) + return DR_EMIT_PERSISTABLE; + + if (VERBOSITY > 1) + dr_printf("============================================================\n"); + if (VERBOSITY > 0) { + string mod_path = (mod_data ? mod_data->path_ : ""); + if (mod_data && !mod_data->executed_) { + mod_data->executed_ = true; // Nevermind this race. + dr_printf("Executing from new module: %s\n", mod_path.c_str()); + } + dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc, + mod_path.c_str(), translating ? "true" : "false"); + if (mod_data) { + // Match standard sanitizer trace format for free symbols. + // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) + dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(), + pc - mod_data->start_); + } + } + if (VERBOSITY > 1) { + instrlist_disassemble(drcontext, pc, bb, STDOUT); + instr_t *instr; + for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) { + dr_printf("opcode: %d\n", instr_get_opcode(instr)); + } + } + + for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) { + int opcode = instr_get_opcode(i); + if (opcode == OP_ret || opcode == OP_ret_far) { + InstrumentReturn(drcontext, bb, i); + continue; + } + + // These instructions hopefully cover all cases where control is transferred + // to a function in a different module (we only care about calls into + // compiler-instrumented modules). + // * call_ind is used for normal indirect calls. + // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT + // stub includes a jump to an address from GOT). + if (opcode == OP_call_ind || opcode == OP_call_far_ind || + opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) { + InstrumentIndirectBranch(drcontext, bb, i); + continue; + } + + if (!WantToInstrument(i)) + continue; + + if (VERBOSITY > 1) { + app_pc orig_pc = dr_fragment_app_pc(tag); + uint flags = instr_get_arith_flags(i); + dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n", + instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags); + } + + if (instr_writes_memory(i)) { + // Instrument memory writes + // bool instrumented_anything = false; + for (int d = 0; d < instr_num_dsts(i); d++) { + opnd_t op = instr_get_dst(i, d); + if (!OperandIsInteresting(op)) + continue; + + // CHECK(!instrumented_anything); + // instrumented_anything = true; + InstrumentMops(drcontext, bb, i, op, true); + break; // only instrumenting the first dst + } + } + } + +// TODO: optimize away redundant restore-spill pairs? + + if (VERBOSITY > 1) { + pc = dr_fragment_app_pc(tag); + dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc); + instrlist_disassemble(drcontext, pc, bb, STDOUT); + } + return DR_EMIT_PERSISTABLE; +} + +void event_module_load(void *drcontext, const module_data_t *info, + bool loaded) { + // Insert the module into the list while maintaining the ordering. + ModuleData mod_data(info); + std::vector::iterator it = + upper_bound(g_module_list.begin(), g_module_list.end(), mod_data, + ModuleDataCompareStart); + it = g_module_list.insert(it, mod_data); + // Check if we should instrument this module. + it->should_instrument_ = ShouldInstrumentModule(&*it); + dr_module_set_should_instrument(info->handle, it->should_instrument_); + + if (VERBOSITY > 0) + dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n", + info->full_path, info->start, info->end, + it->should_instrument_ ? "on" : "off"); +} + +void event_module_unload(void *drcontext, const module_data_t *info) { + if (VERBOSITY > 0) + dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path, + info->start, info->end); + + // Remove the module from the list. + ModuleData mod_data(info); + std::vector::iterator it = + lower_bound(g_module_list.begin(), g_module_list.end(), mod_data, + ModuleDataCompareStart); + // It's a bug if we didn't actually find the module. + CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ && + it->end_ == mod_data.end_ && it->path_ == mod_data.path_); + g_module_list.erase(it); +} + +void event_exit() { + if (VERBOSITY > 0) + dr_printf("==DRMSAN== DONE\n"); +} + +bool event_filter_syscall(void *drcontext, int sysnum) { + // FIXME: only intercept syscalls with memory effects. + return true; /* intercept everything */ +} + +bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { + CHECK(arg->valid); + + if (arg->pre) + return true; + if (arg->mode != DRSYS_PARAM_OUT) + return true; + + size_t sz = arg->size; + + if (sz > 0xFFFFFFFF) { + drmf_status_t res; + drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; + const char *name; + res = drsys_syscall_name(syscall, &name); + CHECK(res == DRMF_SUCCESS); + + dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!" + " Clipping to %llu.\n", + name, arg->ordinal, (unsigned long long) sz, + (unsigned long long)(sz & 0xFFFFFFFF)); + } + + void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr); + memset(p, 0, sz); + + return true; /* keep going */ +} + +bool event_pre_syscall(void *drcontext, int sysnum) { + drsys_syscall_t *syscall; + drsys_sysnum_t sysnum_full; + bool known; + drsys_param_type_t ret_type; + drmf_status_t res; + const char *name; + + res = drsys_cur_syscall(drcontext, &syscall); + CHECK(res == DRMF_SUCCESS); + + res = drsys_syscall_number(syscall, &sysnum_full); + CHECK(res == DRMF_SUCCESS); + CHECK(sysnum == sysnum_full.number); + + res = drsys_syscall_is_known(syscall, &known); + CHECK(res == DRMF_SUCCESS); + + res = drsys_syscall_name(syscall, &name); + CHECK(res == DRMF_SUCCESS); + + res = drsys_syscall_return_type(syscall, &ret_type); + CHECK(res == DRMF_SUCCESS); + CHECK(ret_type != DRSYS_TYPE_INVALID); + CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN); + + res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL); + CHECK(res == DRMF_SUCCESS); + + return true; +} + +void event_post_syscall(void *drcontext, int sysnum) { + drsys_syscall_t *syscall; + drsys_sysnum_t sysnum_full; + bool success = false; + drmf_status_t res; + + res = drsys_cur_syscall(drcontext, &syscall); + CHECK(res == DRMF_SUCCESS); + + res = drsys_syscall_number(syscall, &sysnum_full); + CHECK(res == DRMF_SUCCESS); + CHECK(sysnum == sysnum_full.number); + + res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext), + &success); + CHECK(res == DRMF_SUCCESS); + + if (success) { + res = + drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); + CHECK(res == DRMF_SUCCESS); + } +} + +} // namespace + +DR_EXPORT void dr_init(client_id_t id) { + drmf_status_t res; + + drmgr_init(); + drutil_init(); + + string app_name = dr_get_application_name(); + // This blacklist will still run these apps through DR's code cache. On the + // other hand, we are able to follow children of these apps. + // FIXME: Once DR has detach, we could just detach here. Alternatively, + // if DR had a fork or exec hook to let us decide there, that would be nice. + // FIXME: make the blacklist cmd-adjustable. + if (app_name == "python" || app_name == "python2.7" || app_name == "bash" || + app_name == "sh" || app_name == "true" || app_name == "exit" || + app_name == "yes" || app_name == "echo") + return; + + drsys_options_t ops; + memset(&ops, 0, sizeof(ops)); + ops.struct_size = sizeof(ops); + ops.analyze_unknown_syscalls = false; + + res = drsys_init(id, &ops); + CHECK(res == DRMF_SUCCESS); + + dr_register_filter_syscall_event(event_filter_syscall); + drmgr_register_pre_syscall_event(event_pre_syscall); + drmgr_register_post_syscall_event(event_post_syscall); + res = drsys_filter_all_syscalls(); + CHECK(res == DRMF_SUCCESS); + + InitializeMSanCallbacks(); + + // FIXME: the shadow is initialized earlier when DR calls one of our wrapper + // functions. This may change one day. + // TODO: make this more robust. + + void *drcontext = dr_get_current_drcontext(); + + dr_switch_to_app_state(drcontext); + msan_retval_tls_offset = __msan_get_retval_tls_offset(); + msan_param_tls_offset = __msan_get_param_tls_offset(); + dr_switch_to_dr_state(drcontext); + if (VERBOSITY > 0) { + dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset); + dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset); + } + + // Standard DR events. + dr_register_exit_event(event_exit); + + drmgr_priority_t priority = { + sizeof(priority), /* size of struct */ + "msandr", /* name of our operation */ + NULL, /* optional name of operation we should precede */ + NULL, /* optional name of operation we should follow */ + 0 + }; /* numeric priority */ + + drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority); + drmgr_register_bb_instru2instru_event(event_basic_block, &priority); + drmgr_register_module_load_event(event_module_load); + drmgr_register_module_unload_event(event_module_unload); + if (VERBOSITY > 0) + dr_printf("==MSANDR== Starting!\n"); +} -- 2.7.4