From d656ae28095726830f9beb8dbd4d69f5144ef821 Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Fri, 9 Dec 2022 19:16:00 +0800 Subject: [PATCH] Enhance stack protector Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D139254 --- llvm/lib/CodeGen/StackProtector.cpp | 69 +++++++-- llvm/test/CodeGen/X86/stack-protector-2.ll | 30 ++++ llvm/test/CodeGen/X86/stack-protector-no-return.ll | 165 +++++---------------- .../CodeGen/X86/stack-protector-recursively.ll | 26 ++++ 4 files changed, 152 insertions(+), 138 deletions(-) create mode 100644 llvm/test/CodeGen/X86/stack-protector-recursively.ll diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index f974bb6..9a1063e 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -415,11 +415,11 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M, /// /// Returns true if the platform/triple supports the stackprotectorcreate pseudo /// node. -static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, +static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc, const TargetLoweringBase *TLI, AllocaInst *&AI) { bool SupportsSelectionDAGSP = false; IRBuilder<> B(&F->getEntryBlock().front()); - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext()); AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP); @@ -442,16 +442,34 @@ bool StackProtector::InsertStackProtectors() { TLI->useStackGuardXorFP() || (EnableSelectionDAGSP && !TM->Options.EnableFastISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. + bool RecalculateDT = false; + BasicBlock *FailBB = nullptr; for (BasicBlock &BB : llvm::make_early_inc_range(*F)) { - ReturnInst *RI = dyn_cast(BB.getTerminator()); - if (!RI) + // This is stack protector auto generated check BB, skip it. + if (&BB == FailBB) + continue; + Instruction *CheckLoc = dyn_cast(BB.getTerminator()); + if (!CheckLoc) { + for (auto &Inst : BB) { + auto *CB = dyn_cast(&Inst); + if (!CB) + continue; + if (!CB->doesNotReturn()) + continue; + // Do stack check before non-return calls (e.g: __cxa_throw) + CheckLoc = CB; + break; + } + } + + if (!CheckLoc) continue; // Generate prologue instrumentation if not already generated. if (!HasPrologue) { HasPrologue = true; - SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI); + SupportsSelectionDAGSP &= CreatePrologue(F, M, CheckLoc, TLI, AI); } // SelectionDAG based code generation. Nothing else needs to be done here. @@ -477,8 +495,7 @@ bool StackProtector::InsertStackProtectors() { // verifier guarantees that a tail call is either directly before the // return or with a single correct bitcast of the return value in between so // we don't need to worry about many situations here. - Instruction *CheckLoc = RI; - Instruction *Prev = RI->getPrevNonDebugInstruction(); + Instruction *Prev = CheckLoc->getPrevNonDebugInstruction(); if (Prev && isa(Prev) && cast(Prev)->isTailCall()) CheckLoc = Prev; else if (Prev) { @@ -528,18 +545,13 @@ bool StackProtector::InsertStackProtectors() { // Create the FailBB. We duplicate the BB every time since the MI tail // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. - BasicBlock *FailBB = CreateFailBB(); + if (!FailBB) + FailBB = CreateFailBB(); // Split the basic block before the return instruction. BasicBlock *NewBB = BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return"); - // Update the dominator tree if we need to. - if (DT && DT->isReachableFromEntry(&BB)) { - DT->addNewBlock(NewBB, &BB); - DT->addNewBlock(FailBB, &BB); - } - // Remove default branch instruction to the new BB. BB.getTerminator()->eraseFromParent(); @@ -560,9 +572,38 @@ bool StackProtector::InsertStackProtectors() { .createBranchWeights(SuccessProb.getNumerator(), FailureProb.getNumerator()); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); + + // Update the dominator tree if we need to. + if (DT && DT->isReachableFromEntry(&BB)) + RecalculateDT = true; } } + // TODO: Refine me, use faster way to update DT. + // Now we have spilt the BB, some like: + // =================================== + // BB: + // RetOrNoReturnCall + // ==> + // BB: + // CondBr + // NewBB: + // RetOrNoReturnCall + // FailBB: (*) + // HandleStackCheckFail + // =================================== + // The faster way should cover: + // For NewBB, it should success the old BB's dominatees. + // 1) return: it didn't have dominatee + // 2) no-return call: there may has dominatees. + // + // For FailBB, it may be created before, So + // 1) if it has 1 Predecessors, add it into DT. + // 2) if it has 2 Predecessors, it should has no dominator, remove it from DT. + // 3) if it has 3 or more Predecessors, DT has removed it, do nothing. + if (RecalculateDT) + DT->recalculate(*F); + // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. return HasPrologue; diff --git a/llvm/test/CodeGen/X86/stack-protector-2.ll b/llvm/test/CodeGen/X86/stack-protector-2.ll index c6971a5..f2fc64a 100644 --- a/llvm/test/CodeGen/X86/stack-protector-2.ll +++ b/llvm/test/CodeGen/X86/stack-protector-2.ll @@ -192,4 +192,34 @@ define dso_local void @bar_nossp(i64 %0) { ret void } +; Check stack protect for noreturn call +define dso_local i32 @foo_no_return(i32 %0) #1 { +; CHECK-LABEL: @foo_no_return +entry: + %cmp = icmp sgt i32 %0, 4 + br i1 %cmp, label %if.then, label %if.end + +; CHECK: if.then: ; preds = %entry +; CHECK-NEXT: %StackGuard1 = load volatile i8*, i8* addrspace(257)* inttoptr (i32 40 to i8* addrspace(257)*), align 8 +; CHECK-NEXT: %1 = load volatile i8*, i8** %StackGuardSlot, align 8 +; CHECK-NEXT: %2 = icmp eq i8* %StackGuard1, %1 +; CHECK-NEXT: br i1 %2, label %SP_return, label %CallStackCheckFailBlk +; CHECK: SP_return: ; preds = %if.then +; CHECK-NEXT: %call = call i32 @foo_no_return(i32 1) +; CHECK-NEXT: br label %return +; CHECK: if.end: ; preds = %entry +; CHECK-NEXT: br label %return + +if.then: ; preds = %entry + %call = call i32 @foo_no_return(i32 1) + br label %return + +if.end: ; preds = %entry + br label %return + +return: ; preds = %if.end, %if.then + ret i32 0 +} + attributes #0 = { sspstrong } +attributes #1 = { noreturn sspreq} diff --git a/llvm/test/CodeGen/X86/stack-protector-no-return.ll b/llvm/test/CodeGen/X86/stack-protector-no-return.ll index b8ea5bd..8f391c1 100644 --- a/llvm/test/CodeGen/X86/stack-protector-no-return.ll +++ b/llvm/test/CodeGen/X86/stack-protector-no-return.ll @@ -1,146 +1,63 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -o - -verify-dom-info | FileCheck %s -$__clang_call_terminate = comdat any - -@_ZTIi = external dso_local constant i8* -@.str = private unnamed_addr constant [5 x i8] c"win\0A\00", align 1 - -; Function Attrs: mustprogress noreturn sspreq uwtable -define dso_local void @_Z7catchesv() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %exception = tail call i8* @__cxa_allocate_exception(i64 4) #8 - %0 = bitcast i8* %exception to i32* - store i32 1, i32* %0, align 16 - invoke void @__cxa_throw(i8* nonnull %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #9 - to label %unreachable unwind label %lpad - - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - catch i8* null - %2 = extractvalue { i8*, i32 } %1, 0 - %3 = tail call i8* @__cxa_begin_catch(i8* %2) #8 - %call = invoke i64 @write(i32 noundef 1, i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 noundef 4) - to label %invoke.cont unwind label %lpad1 - - -invoke.cont: ; preds = %lpad - invoke void @_exit(i32 noundef 1) #9 - to label %invoke.cont2 unwind label %lpad1 - -invoke.cont2: ; preds = %invoke.cont - unreachable - -lpad1: ; preds = %invoke.cont, %lpad - %4 = landingpad { i8*, i32 } - cleanup - invoke void @__cxa_end_catch() - to label %eh.resume unwind label %terminate.lpad - -eh.resume: ; preds = %lpad1 - resume { i8*, i32 } %4 - -terminate.lpad: ; preds = %lpad1 - %5 = landingpad { i8*, i32 } - catch i8* null - %6 = extractvalue { i8*, i32 } %5, 0 - tail call void @__clang_call_terminate(i8* %6) #10 - unreachable - - - -unreachable: ; preds = %entry - unreachable -} - -; Function Attrs: nofree -declare dso_local noalias i8* @__cxa_allocate_exception(i64) local_unnamed_addr #1 - -; Function Attrs: nofree noreturn -declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr #2 - -declare dso_local i32 @__gxx_personality_v0(...) - -; Function Attrs: nofree -declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr #1 - -; Function Attrs: nofree -declare dso_local noundef i64 @write(i32 noundef, i8* nocapture noundef readonly, i64 noundef) local_unnamed_addr #3 - -; Function Attrs: nofree noreturn -declare dso_local void @_exit(i32 noundef) local_unnamed_addr #4 - -; Function Attrs: nofree -declare dso_local void @__cxa_end_catch() local_unnamed_addr #1 - -; Function Attrs: noinline noreturn nounwind -define linkonce_odr hidden void @__clang_call_terminate(i8* %0) local_unnamed_addr #5 comdat { -; CHECK-LABEL: __clang_call_terminate: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq __cxa_begin_catch -; CHECK-NEXT: callq _ZSt9terminatev - %2 = tail call i8* @__cxa_begin_catch(i8* %0) #8 - tail call void @_ZSt9terminatev() #10 - unreachable -} - -; Function Attrs: nofree noreturn nounwind -declare dso_local void @_ZSt9terminatev() local_unnamed_addr #6 - -; Function Attrs: mustprogress nofree sspreq uwtable -define dso_local void @_Z4vulni(i32 noundef %op) local_unnamed_addr #7 { -; CHECK-LABEL: _Z4vulni: +; Function Attrs: sspreq +define void @_Z7catchesv() #0 personality i8* null { +; CHECK-LABEL: _Z7catchesv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movq %fs:40, %rax ; CHECK-NEXT: movq %rax, (%rsp) -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: je .LBB2_3 -; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: # %bb.1: # %invoke.cont +; CHECK-NEXT: movq %fs:40, %rax +; CHECK-NEXT: cmpq (%rsp), %rax +; CHECK-NEXT: jne .LBB0_6 +; CHECK-NEXT: # %bb.2: # %SP_return +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: # %bb.3: # %invoke.cont2 +; CHECK-NEXT: .LBB0_4: # %lpad1 +; CHECK-NEXT: .Ltmp4: ; CHECK-NEXT: movq %fs:40, %rax ; CHECK-NEXT: cmpq (%rsp), %rax -; CHECK-NEXT: jne .LBB2_2 -; CHECK-NEXT: # %bb.4: # %SP_return +; CHECK-NEXT: jne .LBB0_6 +; CHECK-NEXT: # %bb.5: # %SP_return2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB2_3: # %if.then +; CHECK-NEXT: .LBB0_6: # %CallStackCheckFailBlk ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movl $4, %edi -; CHECK-NEXT: callq __cxa_allocate_exception -; CHECK-NEXT: movl $1, (%rax) -; CHECK-NEXT: movl $_ZTIi, %esi -; CHECK-NEXT: movq %rax, %rdi -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: callq __cxa_throw -; CHECK-NEXT: .LBB2_2: # %CallStackCheckFailBlk ; CHECK-NEXT: callq __stack_chk_fail@PLT entry: - %cmp = icmp eq i32 %op, 1 - br i1 %cmp, label %if.then, label %if.end + %call = invoke i64 null(i32 0, i8* null, i64 0) + to label %invoke.cont unwind label %lpad1 -if.then: ; preds = %entry - %exception = tail call i8* @__cxa_allocate_exception(i64 4) #8 - %0 = bitcast i8* %exception to i32* - store i32 1, i32* %0, align 16 - tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #9 +invoke.cont: ; preds = %entry + invoke void null(i32 0) #1 + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: ; preds = %invoke.cont unreachable -if.end: ; preds = %entry +lpad1: ; preds = %invoke.cont, %entry + %0 = landingpad { i8*, i32 } + cleanup ret void } -attributes #0 = { mustprogress noreturn sspreq uwtable } -attributes #1 = { nofree } -attributes #2 = { nofree noreturn } -attributes #3 = { nofree } -attributes #4 = { nofree noreturn } -attributes #5 = { noinline noreturn nounwind } -attributes #6 = { nofree noreturn nounwind } -attributes #7 = { mustprogress nofree sspreq uwtable } -attributes #8 = { nounwind } -attributes #9 = { noreturn } -attributes #10 = { noreturn nounwind } +; uselistorder directives +uselistorder i8* null, { 1, 0 } + +attributes #0 = { sspreq } +attributes #1 = { noreturn } diff --git a/llvm/test/CodeGen/X86/stack-protector-recursively.ll b/llvm/test/CodeGen/X86/stack-protector-recursively.ll new file mode 100644 index 0000000..383af16 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-protector-recursively.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-pc-linux-gnu -o - < %s | FileCheck %s + +; Make sure the stack protect not infinitly check __stack_chk_fail. +define dso_local void @__stack_chk_fail() local_unnamed_addr #0 { +; CHECK-LABEL: __stack_chk_fail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %fs:40, %rax +; CHECK-NEXT: movq %rax, (%rsp) +; CHECK-NEXT: movq %fs:40, %rax +; CHECK-NEXT: cmpq (%rsp), %rax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %SP_return +; CHECK-NEXT: ud2 +; CHECK-NEXT: .LBB0_2: # %CallStackCheckFailBlk +; CHECK-NEXT: callq __stack_chk_fail +entry: + tail call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() #1 + +attributes #0 = { noreturn nounwind sspreq } +attributes #1 = { noreturn nounwind } -- 2.7.4