From: Andrew Browne Date: Sat, 10 Dec 2022 01:47:54 +0000 (-0800) Subject: [DFSan] Add callback that allows to track which function tainted data reaches. X-Git-Tag: upstream/17.0.6~24280 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5bb06c7cce6bdcffb3ced29fa733c0dbb1b63c41;p=platform%2Fupstream%2Fllvm.git [DFSan] Add callback that allows to track which function tainted data reaches. Authored-by: Christopher Liebchen Co-authored-by: Andrew Browne Reviewed By: browneee Differential Revision: https://reviews.llvm.org/D139543 --- diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h index 8e581a6..519bfff 100644 --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -31,6 +31,14 @@ typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count); typedef void (*dfsan_conditional_callback_t)(dfsan_label label, dfsan_origin origin); +/// Signature of the callback argument to dfsan_set_reaches_function_callback(). +/// The description is intended to hold the name of the variable. +typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label, + dfsan_origin origin, + const char *file, + unsigned int line, + const char *function); + /// Computes the union of \c l1 and \c l2, resulting in a union label. dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); @@ -91,6 +99,18 @@ void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); /// This function returns all label bits seen in signal handler conditions. dfsan_label dfsan_get_labels_in_signal_conditional(); +/// Sets a callback to be invoked when tainted data reaches a function. +/// This could occur at function entry, or at a load instruction. +/// These callbacks will only be added if -dfsan-reaches-function-callbacks=1. +void dfsan_set_reaches_function_callback( + dfsan_reaches_function_callback_t callback); + +/// Making callbacks that handle signals well is tricky, so when +/// -dfsan-reaches-function-callbacks=true, functions reached in signal +/// handlers will add the labels they see into a global (bitwise-or together). +/// This function returns all label bits seen during signal handlers. +dfsan_label dfsan_get_labels_in_signal_reaches_function(); + /// Interceptor hooks. /// Whenever a dfsan's custom function is called the corresponding /// hook is called it non-zero. The hooks should be defined by the user. diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index 0a6f319..faf5a66 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -718,6 +718,67 @@ dfsan_get_labels_in_signal_conditional() { return __dfsan::labels_in_signal_conditional; } +namespace __dfsan { + +typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label, + dfsan_origin origin, + const char *file, + unsigned int line, + const char *function); +static dfsan_reaches_function_callback_t reaches_function_callback = nullptr; +static dfsan_label labels_in_signal_reaches_function = 0; + +static void ReachesFunctionCallback(dfsan_label label, dfsan_origin origin, + const char *file, unsigned int line, + const char *function) { + if (label == 0) { + return; + } + if (reaches_function_callback == nullptr) { + return; + } + + // This initial ReachesFunctionCallback handler needs to be in here in dfsan + // runtime (rather than being an entirely user implemented hook) so that it + // has access to dfsan thread information. + DFsanThread *t = GetCurrentThread(); + // A callback operation which does useful work (like record the flow) will + // likely be too long executed in a signal handler. + if (t && t->InSignalHandler()) { + // Record set of labels used in signal handler for completeness. + labels_in_signal_reaches_function |= label; + return; + } + + reaches_function_callback(label, origin, file, line, function); +} + +} // namespace __dfsan + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__dfsan_reaches_function_callback_origin(dfsan_label label, dfsan_origin origin, + const char *file, unsigned int line, + const char *function) { + __dfsan::ReachesFunctionCallback(label, origin, file, line, function); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__dfsan_reaches_function_callback(dfsan_label label, const char *file, + unsigned int line, const char *function) { + __dfsan::ReachesFunctionCallback(label, 0, file, line, function); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +dfsan_set_reaches_function_callback( + __dfsan::dfsan_reaches_function_callback_t callback) { + __dfsan::reaches_function_callback = callback; +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label +dfsan_get_labels_in_signal_reaches_function() { + return __dfsan::labels_in_signal_reaches_function; +} + class Decorator : public __sanitizer::SanitizerCommonDecorator { public: Decorator() : SanitizerCommonDecorator() {} @@ -1031,6 +1092,7 @@ extern "C" void dfsan_flush() { } } __dfsan::labels_in_signal_conditional = 0; + __dfsan::labels_in_signal_reaches_function = 0; } // TODO: CheckMemoryLayoutSanity is based on msan. diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt index e8fcd83..ff8a37f 100644 --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -50,6 +50,12 @@ fun:dfsan_set_conditional_callback=uninstrumented fun:dfsan_set_conditional_callback=discard fun:dfsan_get_labels_in_signal_conditional=uninstrumented fun:dfsan_get_labels_in_signal_conditional=discard +fun:dfsan_set_reaches_function_callback=uninstrumented +fun:dfsan_set_reaches_function_callback=discard +fun:dfsan_get_labels_in_signal_reaches_function=uninstrumented +fun:dfsan_get_labels_in_signal_reaches_function=discard +fun:dfsan_reaches_function_callback=uninstrumented +fun:dfsan_reaches_function_callback=discard ############################################################################### # glibc diff --git a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt index 6245a41..f0dff9b 100644 --- a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt +++ b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt @@ -13,3 +13,9 @@ fun:my_dfsan_conditional_callback=discard fun:dfsan_set_conditional_callback=uninstrumented fun:dfsan_set_conditional_callback=discard + +fun:my_dfsan_reaches_function_callback=uninstrumented +fun:my_dfsan_reaches_function_callback=discard + +fun:dfsan_set_reaches_function_callback=uninstrumented +fun:dfsan_set_reaches_function_callback=discard diff --git a/compiler-rt/test/dfsan/reaches_function.c b/compiler-rt/test/dfsan/reaches_function.c new file mode 100644 index 0000000..46a2b7b --- /dev/null +++ b/compiler-rt/test/dfsan/reaches_function.c @@ -0,0 +1,67 @@ +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -gmlt -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-reaches-function-callbacks=1 %s %t-callbacks.o -o %t +// RUN: %run %t 2>&1 | FileCheck %s + +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -DORIGIN_TRACKING -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -gmlt -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-reaches-function-callbacks=1 -mllvm -dfsan-track-origins=2 %s %t-callbacks.o -o %t +// RUN: %run %t 2>&1 | FileCheck --check-prefix=CHECK-ORIGIN-TRACKING %s + +// REQUIRES: x86_64-target-arch + +// Tests that callbacks are inserted for reached functions when +// -dfsan-reaches-function-callbacks is specified. + +#include +#include +#include +#include + +#ifdef CALLBACKS +// Compile this code without DFSan to avoid recursive instrumentation. + +void my_dfsan_reaches_function_callback(dfsan_label label, dfsan_origin origin, + const char *file, unsigned int line, + const char *function) { +#ifdef ORIGIN_TRACKING + dfsan_print_origin_id_trace(origin); +#else + printf("%s:%d %s\n", file, line, function); +#endif +} + +#else + +__attribute__((noinline)) uint64_t add(uint64_t *a, uint64_t *b) { + + return *a + *b; + // CHECK: {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 1]] add.dfsan + // CHECK-ORIGIN-TRACKING: Origin value: 0x10000002, Taint value was stored to memory at + // CHECK-ORIGIN-TRACKING: #0 {{.*}} in add.dfsan {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 3]]:{{.*}} + // CHECK-ORIGIN-TRACKING: Origin value: 0x1, Taint value was created at + // CHECK-ORIGIN-TRACKING: #0 {{.*}} in main {{.*}}compiler-rt/test/dfsan/reaches_function.c:{{.*}} +} + +extern void my_dfsan_reaches_function_callback(dfsan_label label, + dfsan_origin origin, + const char *file, + unsigned int line, + const char *function); + +int main(int argc, char *argv[]) { + + dfsan_set_reaches_function_callback(my_dfsan_reaches_function_callback); + + uint64_t a = 0; + uint64_t b = 0; + + dfsan_set_label(8, &a, sizeof(a)); + uint64_t c = add(&a, &b); + // CHECK: {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 1]] main + // CHECK-ORIGIN-TRACKING: Origin value: 0x10000002, Taint value was stored to memory at + // CHECK-ORIGIN-TRACKING: #0 {{.*}} in add.dfsan {{.*}}compiler-rt/test/dfsan/reaches_function.c:{{.*}} + // CHECK-ORIGIN-TRACKING: Origin value: 0x1, Taint value was created at + // CHECK-ORIGIN-TRACKING: #0 {{.*}} in main {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 6]]:{{.*}} + return c; +} + +#endif // #ifdef CALLBACKS diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index fe2a502..b62f150 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -223,6 +223,14 @@ static cl::opt ClConditionalCallbacks( cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, cl::init(false)); +// Experimental feature that inserts callbacks for data reaching a function, +// either via function arguments and loads. +// This must be true for dfsan_set_reaches_function_callback() to have effect. +static cl::opt ClReachesFunctionCallbacks( + "dfsan-reaches-function-callbacks", + cl::desc("Insert calls to callback functions on data reaching a function."), + cl::Hidden, cl::init(false)); + // Controls whether the pass tracks the control flow of select instructions. static cl::opt ClTrackSelectControlFlow( "dfsan-track-select-control-flow", @@ -446,6 +454,8 @@ class DataFlowSanitizer { FunctionType *DFSanVarargWrapperFnTy; FunctionType *DFSanConditionalCallbackFnTy; FunctionType *DFSanConditionalCallbackOriginFnTy; + FunctionType *DFSanReachesFunctionCallbackFnTy; + FunctionType *DFSanReachesFunctionCallbackOriginFnTy; FunctionType *DFSanCmpCallbackFnTy; FunctionType *DFSanLoadStoreCallbackFnTy; FunctionType *DFSanMemTransferCallbackFnTy; @@ -467,6 +477,8 @@ class DataFlowSanitizer { FunctionCallee DFSanMemTransferCallbackFn; FunctionCallee DFSanConditionalCallbackFn; FunctionCallee DFSanConditionalCallbackOriginFn; + FunctionCallee DFSanReachesFunctionCallbackFn; + FunctionCallee DFSanReachesFunctionCallbackOriginFn; FunctionCallee DFSanCmpCallbackFn; FunctionCallee DFSanChainOriginFn; FunctionCallee DFSanChainOriginIfTaintedFn; @@ -673,6 +685,11 @@ struct DFSanFunction { // branch instruction using the given conditional expression. void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition); + // If ClReachesFunctionCallbacks is enabled, insert a callback for each + // argument and load instruction. + void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I, + Value *Data); + bool isLookupTableConstant(Value *P); private: @@ -1025,6 +1042,45 @@ void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I, } } +void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, + Instruction &I, + Value *Data) { + if (!ClReachesFunctionCallbacks) { + return; + } + const DebugLoc &dbgloc = I.getDebugLoc(); + Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB); + ConstantInt *CILine; + llvm::Value *FilePathPtr; + + if (dbgloc.get() == nullptr) { + CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0, false)); + FilePathPtr = IRB.CreateGlobalStringPtr( + I.getFunction()->getParent()->getSourceFileName()); + } else { + CILine = llvm::ConstantInt::get(I.getContext(), + llvm::APInt(32, dbgloc.getLine(), false)); + FilePathPtr = + IRB.CreateGlobalStringPtr(dbgloc->getFilename()); + } + + llvm::Value *FunctionNamePtr = + IRB.CreateGlobalStringPtr(I.getFunction()->getName()); + + CallInst *CB; + std::vector args; + + if (DFS.shouldTrackOrigins()) { + Value *DataOrigin = getOrigin(Data); + args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr }; + CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args); + } else { + args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr }; + CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args); + } + CB->setDebugLoc(dbgloc); +} + Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) { if (!OrigTy->isSized()) return PrimitiveShadowTy; @@ -1097,6 +1153,16 @@ bool DataFlowSanitizer::initializeModule(Module &M) { DFSanConditionalCallbackOriginFnTy = FunctionType::get( Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs, /*isVarArg=*/false); + Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr, + OriginTy, Int8Ptr}; + DFSanReachesFunctionCallbackFnTy = + FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs, + /*isVarArg=*/false); + Type *DFSanReachesFunctionCallbackOriginArgs[5] = { + PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr}; + DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs, + /*isVarArg=*/false); DFSanCmpCallbackFnTy = FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, /*isVarArg=*/false); @@ -1325,6 +1391,10 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { DFSanRuntimeFunctions.insert( DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( + DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanChainOriginFn.getCallee()->stripPointerCasts()); @@ -1357,6 +1427,11 @@ void DataFlowSanitizer::initializeCallbackFunctions(Module &M) { DFSanConditionalCallbackOriginFn = Mod->getOrInsertFunction("__dfsan_conditional_callback_origin", DFSanConditionalCallbackOriginFnTy); + DFSanReachesFunctionCallbackFn = Mod->getOrInsertFunction( + "__dfsan_reaches_function_callback", DFSanReachesFunctionCallbackFnTy); + DFSanReachesFunctionCallbackOriginFn = + Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin", + DFSanReachesFunctionCallbackOriginFnTy); } void DataFlowSanitizer::injectMetadataGlobals(Module &M) { @@ -1585,6 +1660,31 @@ bool DataFlowSanitizer::runImpl( DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F), FnsWithForceZeroLabel.count(F), GetTLI(*F)); + if (ClReachesFunctionCallbacks) { + // Add callback for arguments reaching this function. + for (auto &FArg : F->args()) { + Instruction *Next = &F->getEntryBlock().front(); + Value *FArgShadow = DFSF.getShadow(&FArg); + if (isZeroShadow(FArgShadow)) + continue; + if (Instruction *FArgShadowInst = dyn_cast(FArgShadow)) { + Next = FArgShadowInst->getNextNode(); + } + if (shouldTrackOrigins()) { + if (Instruction *Origin = + dyn_cast(DFSF.getOrigin(&FArg))) { + // Ensure IRB insertion point is after loads for shadow and origin. + Instruction *OriginNext = Origin->getNextNode(); + if (Next->comesBefore(OriginNext)) { + Next = OriginNext; + } + } + } + IRBuilder<> IRB(Next); + DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg); + } + } + // DFSanVisitor may create new basic blocks, which confuses df_iterator. // Build a copy of the list before iterating over it. SmallVector BBList(depth_first(&F->getEntryBlock())); @@ -2267,6 +2367,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { if (LI.isAtomic()) LI.setOrdering(addAcquireOrdering(LI.getOrdering())); + Instruction *AfterLi = LI.getNextNode(); Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI; std::vector Shadows; std::vector Origins; @@ -2304,6 +2405,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr); IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8}); } + + IRBuilder<> IRB(AfterLi); + DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI); } Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin, @@ -3303,6 +3407,8 @@ void DFSanVisitor::visitCallBase(CallBase &CB) { DFSF.SkipInsts.insert(LI); DFSF.setOrigin(&CB, LI); } + + DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB); } } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll b/llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll new file mode 100644 index 0000000..6546968 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -passes=dfsan -dfsan-reaches-function-callbacks=1 -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare i32 @f() + +define void @load(i32) { + ; CHECK-LABEL: define void @load.dfsan + ; CHECK: call{{.*}}@__dfsan_reaches_function_callback + %i = alloca i32 + store i32 %0, ptr %i + ret void +} + +define void @store(ptr) { + ; CHECK-LABEL: define void @store.dfsan + ; CHECK: call{{.*}}@__dfsan_reaches_function_callback + %load = load i32, ptr %0 + ret void +} + +define void @call() { + ; CHECK-LABEL: define void @call.dfsan + ; CHECK: call{{.*}}@__dfsan_reaches_function_callback + %ret = call i32 @f() + ret void +} + +; CHECK-LABEL: @__dfsan_reaches_function_callback(i8, ptr, i32, ptr)