From a2f6cecb6d18bcc59d6c702378ed0701cd0208ae Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 22 May 2012 17:19:09 +0000 Subject: [PATCH] add a new pass to instrument loads and stores for run-time bounds checking move EmitGEPOffset from InstCombine to Transforms/Utils/Local.h (a draft of this) patch reviewed by Andrew, thanks. llvm-svn: 157261 --- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Transforms/Scalar.h | 8 + llvm/include/llvm/Transforms/Utils/Local.h | 61 +++++ llvm/lib/Transforms/InstCombine/InstCombine.h | 2 +- .../Transforms/InstCombine/InstCombineAddSub.cpp | 61 ----- .../InstCombine/InstructionCombining.cpp | 4 + llvm/lib/Transforms/Scalar/BoundsChecking.cpp | 280 +++++++++++++++++++++ llvm/lib/Transforms/Scalar/Scalar.cpp | 1 + 9 files changed, 357 insertions(+), 62 deletions(-) create mode 100644 llvm/lib/Transforms/Scalar/BoundsChecking.cpp diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 33d2043..df696b18 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -71,6 +71,7 @@ void initializeBasicCallGraphPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoPass(PassRegistry&); void initializeBlockPlacementPass(PassRegistry&); +void initializeBoundsCheckingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 2258d45..697c94c 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -55,6 +55,7 @@ namespace { (void) llvm::createScalarEvolutionAliasAnalysisPass(); (void) llvm::createTypeBasedAliasAnalysisPass(); (void) llvm::createBlockPlacementPass(); + (void) llvm::createBoundsCheckingPass(); (void) llvm::createBreakCriticalEdgesPass(); (void) llvm::createCFGSimplificationPass(); (void) llvm::createConstantMergePass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 7f055d4..06130d1 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -327,6 +327,14 @@ Pass *createCorrelatedValuePropagationPass(); //===----------------------------------------------------------------------===// // +// BoundsChecking - This pass instruments the code to perform run-time bounds +// checking on loads, stores, and other memory intrinsics. +// Penalty is the maximum run-time that is acceptable for the user. +// +FunctionPass *createBoundsCheckingPass(unsigned Penalty = 5); + +//===----------------------------------------------------------------------===// +// // ObjCARCAPElim - ObjC ARC autorelease pool elimination. // Pass *createObjCARCAPElimPass(); diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 7f99dbc..936d58e 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -15,6 +15,11 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Operator.h" + namespace llvm { class User; @@ -160,6 +165,62 @@ static inline unsigned getKnownAlignment(Value *V, const TargetData *TD = 0) { return getOrEnforceKnownAlignment(V, 0, TD); } +/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the +/// code necessary to compute the offset from the base pointer (without adding +/// in the base pointer). Return the result as a signed integer of intptr size. +template +Value *EmitGEPOffset(IRBuilderTy *Builder, const TargetData &TD, User *GEP) { + gep_type_iterator GTI = gep_type_begin(GEP); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Value *Result = Constant::getNullValue(IntPtrTy); + + // If the GEP is inbounds, we know that none of the addressing operations will + // overflow in an unsigned sense. + bool isInBounds = cast(GEP)->isInBounds(); + + // Build a mask for high order bits. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + if (ConstantInt *OpC = dyn_cast(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + if (Size) + Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); + continue; + } + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); + // Emit an add instruction. + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); + if (Size != 1) { + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".idx", isInBounds /*NUW*/); + } + + // Emit an add instruction. + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); + } + return Result; +} + ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// diff --git a/llvm/lib/Transforms/InstCombine/InstCombine.h b/llvm/lib/Transforms/InstCombine/InstCombine.h index 41b2456..199df51 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -226,7 +226,7 @@ private: bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - Value *EmitGEPOffset(User *GEP, bool NoNUW = false); + Value *EmitGEPOffset(User *GEP); public: // InsertNewInstBefore - insert an instruction New before instruction Old diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 8420a6a..c88ceed 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -420,67 +420,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } -/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the -/// code necessary to compute the offset from the base pointer (without adding -/// in the base pointer). Return the result as a signed integer of intptr size. -/// If NoNUW is true, then the NUW flag is not used. -Value *InstCombiner::EmitGEPOffset(User *GEP, bool NoNUW) { - TargetData &TD = *getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); - Value *Result = Constant::getNullValue(IntPtrTy); - - // If the GEP is inbounds, we know that none of the addressing operations will - // overflow in an unsigned sense. - bool isInBounds = cast(GEP)->isInBounds() && !NoNUW; - - // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; - ++i, ++GTI) { - Value *Op = *i; - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; - if (ConstantInt *OpC = dyn_cast(Op)) { - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - - if (Size) - Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); - continue; - } - - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - Constant *OC = - ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); - // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); - continue; - } - // Convert to correct type. - if (Op->getType() != IntPtrTy) - Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); - if (Size != 1) { - // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".idx", isInBounds /*NUW*/); - } - - // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); - } - return Result; -} - - - - /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index aa6e82f..b8a533b 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -87,6 +87,10 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { } +Value *InstCombiner::EmitGEPOffset(User *GEP) { + return llvm::EmitGEPOffset(Builder, *getTargetData(), GEP); +} + /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. diff --git a/llvm/lib/Transforms/Scalar/BoundsChecking.cpp b/llvm/lib/Transforms/Scalar/BoundsChecking.cpp new file mode 100644 index 0000000..c92ae26 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/BoundsChecking.cpp @@ -0,0 +1,280 @@ +//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that instruments the code to perform run-time +// bounds checking on loads, stores, and other memory intrinsics. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bounds-checking" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/TargetFolder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Operator.h" +#include "llvm/Pass.h" +using namespace llvm; + +STATISTIC(ChecksAdded, "Bounds checks added"); +STATISTIC(ChecksSkipped, "Bounds checks skipped"); +STATISTIC(ChecksUnable, "Bounds checks unable to add"); + +typedef IRBuilder BuilderTy; + +namespace { + enum ConstTriState { + NotConst, Const, Dunno + }; + + struct BoundsChecking : public FunctionPass { + const TargetData *TD; + BuilderTy *Builder; + Function *Fn; + BasicBlock *TrapBB; + unsigned Penalty; + static char ID; + + BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){ + initializeBoundsCheckingPass(*PassRegistry::getPassRegistry()); + } + + BasicBlock *getTrapBB(); + ConstTriState computeAllocSize(Value *Alloc, uint64_t &Size, Value* &SizeValue); + bool instrument(Value *Ptr, Value *Val); + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + }; +} + +char BoundsChecking::ID = 0; +INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking", + false, false) + + +/// getTrapBB - create a basic block that traps. All overflowing conditions +/// branch to this block. There's only one trap block per function. +BasicBlock *BoundsChecking::getTrapBB() { + if (TrapBB) + return TrapBB; + + BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint(); + TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); + Builder->SetInsertPoint(TrapBB); + + llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); + CallInst *TrapCall = Builder->CreateCall(F); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder->CreateUnreachable(); + + Builder->SetInsertPoint(PrevInsertPoint); + return TrapBB; +} + + +/// computeAllocSize - compute the object size allocated by an allocation +/// site. Returns NotConst if the size is not constant (in SizeValue), Const if +/// the size is constant (in Size), and Dunno if the size could not be +/// determined within the given maximum Penalty that the computation would +/// incurr at run-time. +ConstTriState BoundsChecking::computeAllocSize(Value *Alloc, uint64_t &Size, + Value* &SizeValue) { + if (GlobalVariable *GV = dyn_cast(Alloc)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + Size = TD->getTypeAllocSize(C->getType()); + return Const; + } + return Dunno; + + } else if (AllocaInst *AI = dyn_cast(Alloc)) { + if (!AI->getAllocatedType()->isSized()) + return Dunno; + + Size = TD->getTypeAllocSize(AI->getAllocatedType()); + if (!AI->isArrayAllocation()) + return Const; // we are done + + Value *ArraySize = AI->getArraySize(); + if (const ConstantInt *C = dyn_cast(ArraySize)) { + Size *= C->getZExtValue(); + return Const; + } + + if (Penalty < 2) + return Dunno; + + SizeValue = ConstantInt::get(ArraySize->getType(), Size); + SizeValue = Builder->CreateMul(SizeValue, ArraySize); + return NotConst; + + } else if (CallInst *MI = extractMallocCall(Alloc)) { + SizeValue = MI->getArgOperand(0); + if (ConstantInt *CI = dyn_cast(SizeValue)) { + Size = CI->getZExtValue(); + return Const; + } + return Penalty >= 2 ? NotConst : Dunno; + + } else if (CallInst *MI = extractCallocCall(Alloc)) { + Value *Arg1 = MI->getArgOperand(0); + Value *Arg2 = MI->getArgOperand(1); + if (ConstantInt *CI1 = dyn_cast(Arg1)) { + if (ConstantInt *CI2 = dyn_cast(Arg2)) { + Size = (CI1->getValue() * CI2->getValue()).getZExtValue(); + return Const; + } + } + + if (Penalty < 2) + return Dunno; + + SizeValue = Builder->CreateMul(Arg1, Arg2); + return NotConst; + } + + DEBUG(dbgs() << "computeAllocSize failed:\n" << *Alloc); + return Dunno; +} + + +bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { + uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType()); + DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) + << " bytes\n"); + + Type *SizeTy = Type::getInt64Ty(Fn->getContext()); + + // Get to the real allocated thing and offset as fast as possible. + Ptr = Ptr->stripPointerCasts(); + GEPOperator *GEP; + + if ((GEP = dyn_cast(Ptr))) { + // check if we will be able to get the offset + if (!GEP->hasAllConstantIndices() && Penalty < 2) { + ++ChecksUnable; + return false; + } + Ptr = GEP->getPointerOperand()->stripPointerCasts(); + } + + uint64_t Size = 0; + Value *SizeValue = 0; + ConstTriState ConstAlloc = computeAllocSize(Ptr, Size, SizeValue); + if (ConstAlloc == Dunno) { + ++ChecksUnable; + return false; + } + assert(ConstAlloc == Const || SizeValue); + + uint64_t Offset = 0; + Value *OffsetValue = 0; + + if (GEP) { + if (GEP->hasAllConstantIndices()) { + SmallVector Ops(GEP->idx_begin(), GEP->idx_end()); + assert(GEP->getPointerOperandType()->isPointerTy()); + Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); + } else { + OffsetValue = EmitGEPOffset(Builder, *TD, GEP); + } + } + + if (!OffsetValue && ConstAlloc == Const) { + if (Size < Offset || (Size - Offset) < NeededSize) { + // Out of bounds + Builder->CreateBr(getTrapBB()); + ++ChecksAdded; + return true; + } + // in bounds + ++ChecksSkipped; + return false; + } + + if (OffsetValue) + OffsetValue = Builder->CreateZExt(OffsetValue, SizeTy); + else + OffsetValue = ConstantInt::get(SizeTy, Offset); + + if (SizeValue) + SizeValue = Builder->CreateZExt(SizeValue, SizeTy); + else + SizeValue = ConstantInt::get(SizeTy, Size); + + Value *NeededSizeVal = ConstantInt::get(SizeTy, NeededSize); + Value *ObjSize = Builder->CreateSub(SizeValue, OffsetValue); + Value *Cmp1 = Builder->CreateICmpULT(SizeValue, OffsetValue); + Value *Cmp2 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); + Value *Or = Builder->CreateOr(Cmp1, Cmp2); + + // FIXME: add unlikely branch taken metadata? + Instruction *Inst = Builder->GetInsertPoint(); + BasicBlock *OldBB = Inst->getParent(); + BasicBlock *Cont = OldBB->splitBasicBlock(Inst); + OldBB->getTerminator()->eraseFromParent(); + BranchInst::Create(getTrapBB(), Cont, Or, OldBB); + ++ChecksAdded; + return true; +} + +bool BoundsChecking::runOnFunction(Function &F) { + TD = &getAnalysis(); + + TrapBB = 0; + Fn = &F; + BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); + Builder = &TheBuilder; + + // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory + // touching instructions + std::vector WorkList; + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &*i; + if (isa(I) || isa(I) || isa(I) || + isa(I)) + WorkList.push_back(I); + } + + bool MadeChange = false; + while (!WorkList.empty()) { + Instruction *I = WorkList.back(); + WorkList.pop_back(); + + Builder->SetInsertPoint(I); + if (LoadInst *LI = dyn_cast(I)) { + MadeChange |= instrument(LI->getPointerOperand(), LI); + } else if (StoreInst *SI = dyn_cast(I)) { + MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand()); + } else if (AtomicCmpXchgInst *AI = dyn_cast(I)) { + MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand()); + } else if (AtomicRMWInst *AI = dyn_cast(I)) { + MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand()); + } else { + llvm_unreachable("unknown Instruction type"); + } + } + return MadeChange; +} + +FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) { + return new BoundsChecking(Penalty); +} diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 7d65bcc..49970d4 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -29,6 +29,7 @@ using namespace llvm; void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); initializeBlockPlacementPass(Registry); + initializeBoundsCheckingPass(Registry); initializeCodeGenPreparePass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); -- 2.7.4