From ec277b67eb36332c2f3c48e1967b2540af681716 Mon Sep 17 00:00:00 2001 From: Kevin Athey Date: Thu, 11 Aug 2022 00:23:15 -0700 Subject: [PATCH] [MSAN] Separate id ptr from constant string for variable names used in track origins. The goal is to reduce the size of the MSAN with track origins binary, by making the variable name locations constant which will allow the linker to compress them. Follows: https://reviews.llvm.org/D131415 Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D131631 --- compiler-rt/lib/msan/msan.cpp | 27 ++++++++++------ compiler-rt/lib/msan/msan_interface_internal.h | 3 ++ .../Transforms/Instrumentation/MemorySanitizer.cpp | 37 +++++++++++----------- .../test/Instrumentation/MemorySanitizer/alloca.ll | 31 +++++++++--------- 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 3bc48c0..6f92d85 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -303,19 +303,21 @@ u32 ChainOrigin(u32 id, StackTrace *stack) { return chained.raw_id(); } -// 'descr' is created at compile time and contains '----' in the beginning. -// When we see descr for the first time we replace '----' with a uniq id -// and set the origin to (id | (31-th bit)). -static inline void SetAllocaOrigin(void *a, uptr size, char *descr, uptr pc) { +// Current implementation separates the 'id_ptr' from the 'descr' and makes +// 'descr' constant. +// Previous implementation 'descr' is created at compile time and contains +// '----' in the beginning. When we see descr for the first time we replace +// '----' with a uniq id and set the origin to (id | (31-th bit)). +static inline void SetAllocaOrigin(void *a, uptr size, u32 *id_ptr, char *descr, + uptr pc) { static const u32 dash = '-'; static const u32 first_timer = dash + (dash << 8) + (dash << 16) + (dash << 24); - u32 *id_ptr = (u32 *)descr; u32 id = *id_ptr; - if (id == first_timer) { + if (id == 0 || id == first_timer) { u32 idx = atomic_fetch_add(&NumStackOriginDescrs, 1, memory_order_relaxed); CHECK_LT(idx, kNumStackOriginDescrs); - StackOriginDescr[idx] = descr + 4; + StackOriginDescr[idx] = descr; StackOriginPC[idx] = pc; id = Origin::CreateStackOrigin(idx).raw_id(); *id_ptr = id; @@ -602,14 +604,21 @@ void __msan_set_origin(const void *a, uptr size, u32 origin) { } void __msan_set_alloca_origin(void *a, uptr size, char *descr) { - SetAllocaOrigin(a, size, descr, GET_CALLER_PC()); + SetAllocaOrigin(a, size, reinterpret_cast(descr), descr + 4, + GET_CALLER_PC()); } void __msan_set_alloca_origin4(void *a, uptr size, char *descr, uptr pc) { // Intentionally ignore pc and use return address. This function is here for // compatibility, in case program is linked with library instrumented by // older clang. - SetAllocaOrigin(a, size, descr, GET_CALLER_PC()); + SetAllocaOrigin(a, size, reinterpret_cast(descr), descr + 4, + GET_CALLER_PC()); +} + +void __msan_set_alloca_origin_with_descr(void *a, uptr size, u32 *id_ptr, + char *descr) { + SetAllocaOrigin(a, size, id_ptr, descr, GET_CALLER_PC()); } u32 __msan_chain_origin(u32 id) { diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index c72c91c..009d5b4 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -109,6 +109,9 @@ void __msan_set_alloca_origin(void *a, uptr size, char *descr); SANITIZER_INTERFACE_ATTRIBUTE void __msan_set_alloca_origin4(void *a, uptr size, char *descr, uptr pc); SANITIZER_INTERFACE_ATTRIBUTE +void __msan_set_alloca_origin_with_descr(void *a, uptr size, u32 *id_ptr, + char *descr); +SANITIZER_INTERFACE_ATTRIBUTE u32 __msan_chain_origin(u32 id); SANITIZER_INTERFACE_ATTRIBUTE u32 __msan_get_origin(const void *a); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index a3c6059..bb22608 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -579,7 +579,7 @@ private: /// Run-time helper that generates a new origin value for a stack /// allocation. - FunctionCallee MsanSetAllocaOriginFn; + FunctionCallee MsanSetAllocaOriginWithDescriptionFn; /// Run-time helper that poisons stack on function entry. FunctionCallee MsanPoisonStackFn; @@ -691,10 +691,10 @@ void MemorySanitizerPass::printPipeline( /// Creates a writable global for Str so that we can pass it to the /// run-time lib. Runtime uses first 4 bytes of the string to store the /// frame ID, so the string needs to be mutable. -static GlobalVariable *createPrivateNonConstGlobalForString(Module &M, - StringRef Str) { +static GlobalVariable *createPrivateConstGlobalForString(Module &M, + StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false, + return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, StrConst, ""); } @@ -825,9 +825,9 @@ void MemorySanitizer::createUserspaceApi(Module &M) { IRB.getInt32Ty()); } - MsanSetAllocaOriginFn = M.getOrInsertFunction( - "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, - IRB.getInt8PtrTy()); + MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction( + "__msan_set_alloca_origin_with_descr", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IRB.getInt8PtrTy()); MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy); @@ -3877,17 +3877,16 @@ struct MemorySanitizerVisitor : public InstVisitor { "_msphi_o")); } + Value *getLocalVarIdptr(AllocaInst &I) { + ConstantInt *IntConst = + ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0); + return new GlobalVariable(*F.getParent(), IntConst->getType(), + /*isConstant=*/false, GlobalValue::PrivateLinkage, + IntConst); + } + Value *getLocalVarDescription(AllocaInst &I) { - SmallString<2048> StackDescriptionStorage; - raw_svector_ostream StackDescription(StackDescriptionStorage); - // We create a string with a description of the stack allocation and - // pass it into __msan_set_alloca_origin. - // It will be printed by the run-time if stack-originated UMR is found. - // The first 4 bytes of the string are set to '----' and will be replaced - // by __msan_va_arg_overflow_size_tls at the first call. - StackDescription << "----" << I.getName(); - return createPrivateNonConstGlobalForString(*F.getParent(), - StackDescription.str()); + return createPrivateConstGlobalForString(*F.getParent(), I.getName()); } void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) { @@ -3904,9 +3903,11 @@ struct MemorySanitizerVisitor : public InstVisitor { } if (PoisonStack && MS.TrackOrigins) { + Value *Idptr = getLocalVarIdptr(I); Value *Descr = getLocalVarDescription(I); - IRB.CreateCall(MS.MsanSetAllocaOriginFn, + IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn, {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len, + IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy()), IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())}); } } diff --git a/llvm/test/Instrumentation/MemorySanitizer/alloca.ll b/llvm/test/Instrumentation/MemorySanitizer/alloca.ll index 2fc50ac..13fed18 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/alloca.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/alloca.ll @@ -14,7 +14,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; ORIGIN: [[D:@[0-9]+]] = private global [13 x i8] c"----unique_x\00" +; ORIGIN: [[IDPTR:@[0-9]+]] = private global i32 0 +; ORIGIN: [[DESCR:@[0-9]+]] = private constant [9 x i8] c"unique_x\00" define void @static() sanitize_memory { entry: @@ -25,7 +26,7 @@ entry: ; CHECK-LABEL: define void @static( ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, i8* {{.*}} [[D]], +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, i8* {{.*}} [[IDPTR]] {{.*}}, i8* {{.*}} [[DESCR]], ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: ret void @@ -41,7 +42,7 @@ l: ; CHECK-LABEL: define void @dynamic( ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: ret void @@ -54,7 +55,7 @@ entry: ; CHECK-LABEL: define void @array( ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 20, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 20) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 20, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 20, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 20, ; CHECK: ret void @@ -68,7 +69,7 @@ entry: ; CHECK: %[[A:.*]] = mul i64 4, %cnt ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 %[[A]], i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 %[[A]]) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 %[[A]], +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 %[[A]], ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 %[[A]], ; CHECK: ret void @@ -82,7 +83,7 @@ entry: ; CHECK-LABEL: define void @unpoison_local( ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 20, i1 false) ; CALL: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 20, i1 false) -; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 20, +; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 20, ; KMSAN: call void @__msan_unpoison_alloca(i8* {{.*}}, i64 20) ; CHECK: ret void @@ -111,13 +112,13 @@ another_bb: ; CHECK: call void @llvm.lifetime.start ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: call void @llvm.lifetime.start ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: ret void @@ -138,7 +139,7 @@ entry: ; CHECK: %[[A:.*]] = mul i64 4, %cnt ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 %[[A]], i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 %[[A]]) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 %[[A]], +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 %[[A]], ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 %[[A]], ; CHECK: call void @llvm.lifetime.end ; CHECK: ret void @@ -178,36 +179,36 @@ another_bb: ; CHECK: %x = alloca i32 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: %y = alloca i32 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: %z = alloca i32 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; There're two lifetime intrinsics for %z, but we must instrument it only once. ; INLINE-NOT: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL-NOT: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN-NOT: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK-LABEL: another_bb: ; CHECK: call void @llvm.lifetime.start ; INLINE-NOT: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL-NOT: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN-NOT: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: call void @llvm.lifetime.end ; CHECK: call void @llvm.lifetime.start ; INLINE-NOT: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false) ; CALL-NOT: call void @__msan_poison_stack(i8* {{.*}}, i64 4) -; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, +; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, ; KMSAN-NOT: call void @__msan_poison_alloca(i8* {{.*}}, i64 4, ; CHECK: call void @llvm.lifetime.end -- 2.7.4