From 7e274e02ae088923e67cd13b99d52644532ad1cc Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Thu, 22 Dec 2016 16:03:48 +0000 Subject: [PATCH] [GVN] Initial check-in of a new global value numbering algorithm. The code have been developed by Daniel Berlin over the years, and the new implementation goal is that of addressing shortcomings of the current GVN infrastructure, i.e. long compile time for large testcases, lack of phi predication, no load/store value numbering etc... The current code just implements the "core" GVN algorithm, although other pieces (load coercion, phi handling, predicate system) are already implemented in a branch out of tree. Once the core is stable, we'll start adding pieces on top of the base framework. The test currently living in test/Transform/NewGVN are a copy of the ones in GVN, with proper `XFAIL` (missing features in NewGVN). A flag will be added in a future commit to enable NewGVN, so that interested parties can exercise this code easily. Differential Revision: https://reviews.llvm.org/D26224 llvm-svn: 290346 --- llvm/include/llvm-c/Transforms/Scalar.h | 3 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Transforms/Scalar.h | 7 + .../include/llvm/Transforms/Scalar/GVNExpression.h | 551 ++++++ llvm/include/llvm/Transforms/Scalar/NewGVN.h | 28 + llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + llvm/lib/Transforms/Scalar/NewGVN.cpp | 1853 ++++++++++++++++++++ llvm/lib/Transforms/Scalar/Scalar.cpp | 5 + .../Transforms/NewGVN/2007-07-25-DominatedLoop.ll | 86 + .../Transforms/NewGVN/2007-07-25-InfiniteLoop.ll | 15 + llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll | 15 + .../Transforms/NewGVN/2007-07-25-NestedLoop.ll | 38 + .../NewGVN/2007-07-25-SinglePredecessor.ll | 29 + .../NewGVN/2007-07-26-InterlockingLoops.ll | 40 + .../Transforms/NewGVN/2007-07-26-NonRedundant.ll | 16 + .../Transforms/NewGVN/2007-07-26-PhiErasure.ll | 45 + llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll | 274 +++ .../Transforms/NewGVN/2007-07-31-NoDomInherit.ll | 315 ++++ .../Transforms/NewGVN/2007-07-31-RedundantPhi.ll | 23 + .../test/Transforms/NewGVN/2008-02-12-UndefLoad.ll | 22 + llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll | 22 + .../Transforms/NewGVN/2008-07-02-Unreachable.ll | 36 + .../Transforms/NewGVN/2008-12-09-SelfRemove.ll | 38 + .../test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll | 35 + .../Transforms/NewGVN/2008-12-14-rle-reanalyze.ll | 18 + .../Transforms/NewGVN/2008-12-15-CacheVisited.ll | 28 + .../NewGVN/2009-01-21-SortInvalidation.ll | 55 + .../NewGVN/2009-01-22-SortInvalidation.ll | 100 ++ .../test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll | 110 ++ .../Transforms/NewGVN/2009-07-13-MemDepSortFail.ll | 67 + .../NewGVN/2009-11-12-MemDepMallocBitCast.ll | 15 + .../Transforms/NewGVN/2010-03-31-RedundantPHIs.ll | 42 + llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll | 67 + llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll | 15 + .../Transforms/NewGVN/2011-04-27-phioperands.ll | 106 ++ .../NewGVN/2011-07-07-MatchIntrinsicExtract.ll | 86 + .../test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll | 81 + llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll | 33 + .../NewGVN/2016-08-30-MaskedScatterGather.ll | 43 + llvm/test/Transforms/NewGVN/MemdepMiscompile.ll | 54 + llvm/test/Transforms/NewGVN/assume-equal.ll | 276 +++ llvm/test/Transforms/NewGVN/basic-undef-test.ll | 15 + llvm/test/Transforms/NewGVN/basic.ll | 17 + llvm/test/Transforms/NewGVN/big-endian.ll | 40 + llvm/test/Transforms/NewGVN/bitcast-of-call.ll | 14 + llvm/test/Transforms/NewGVN/br-identical.ll | 38 + llvm/test/Transforms/NewGVN/calloc-load-removal.ll | 26 + llvm/test/Transforms/NewGVN/calls-nonlocal.ll | 76 + llvm/test/Transforms/NewGVN/calls-readonly.ll | 45 + llvm/test/Transforms/NewGVN/commute.ll | 23 + llvm/test/Transforms/NewGVN/cond_br.ll | 55 + llvm/test/Transforms/NewGVN/cond_br2.ll | 141 ++ llvm/test/Transforms/NewGVN/condprop.ll | 300 ++++ llvm/test/Transforms/NewGVN/crash-no-aa.ll | 15 + llvm/test/Transforms/NewGVN/crash.ll | 201 +++ llvm/test/Transforms/NewGVN/dbg-redundant-load.ll | 52 + llvm/test/Transforms/NewGVN/edge.ll | 171 ++ llvm/test/Transforms/NewGVN/fence.ll | 70 + llvm/test/Transforms/NewGVN/flags.ll | 19 + llvm/test/Transforms/NewGVN/fold-const-expr.ll | 100 ++ llvm/test/Transforms/NewGVN/fpmath.ll | 45 + llvm/test/Transforms/NewGVN/funclet.ll | 44 + llvm/test/Transforms/NewGVN/invariant.group.ll | 338 ++++ llvm/test/Transforms/NewGVN/invariant.start.ll | 59 + llvm/test/Transforms/NewGVN/lifetime-simple.ll | 20 + llvm/test/Transforms/NewGVN/load-constant-mem.ll | 19 + .../NewGVN/load-from-unreachable-predecessor.ll | 20 + llvm/test/Transforms/NewGVN/malloc-load-removal.ll | 57 + .../NewGVN/no_speculative_loads_with_asan.ll | 55 + llvm/test/Transforms/NewGVN/noalias.ll | 43 + llvm/test/Transforms/NewGVN/non-local-offset.ll | 59 + llvm/test/Transforms/NewGVN/nonescaping-malloc.ll | 112 ++ .../test/Transforms/NewGVN/null-aliases-nothing.ll | 20 + llvm/test/Transforms/NewGVN/opt-remarks.ll | 109 ++ .../NewGVN/phi-translate-partial-alias.ll | 27 + llvm/test/Transforms/NewGVN/pr10820.ll | 19 + llvm/test/Transforms/NewGVN/pr12979.ll | 93 + llvm/test/Transforms/NewGVN/pr14166.ll | 25 + llvm/test/Transforms/NewGVN/pr17732.ll | 31 + llvm/test/Transforms/NewGVN/pr17852.ll | 66 + llvm/test/Transforms/NewGVN/pr24397.ll | 18 + llvm/test/Transforms/NewGVN/pr24426.ll | 18 + llvm/test/Transforms/NewGVN/pr25440.ll | 108 ++ llvm/test/Transforms/NewGVN/pr28562.ll | 9 + llvm/test/Transforms/NewGVN/pre-compare.ll | 68 + llvm/test/Transforms/NewGVN/pre-new-inst.ll | 30 + llvm/test/Transforms/NewGVN/propagate-ir-flags.ll | 29 + llvm/test/Transforms/NewGVN/range.ll | 101 ++ llvm/test/Transforms/NewGVN/readattrs.ll | 18 + llvm/test/Transforms/NewGVN/rle-must-alias.ll | 48 + .../test/Transforms/NewGVN/rle-no-phi-translate.ll | 28 + llvm/test/Transforms/NewGVN/rle-nonlocal.ll | 26 + llvm/test/Transforms/NewGVN/stale-loop-info.ll | 50 + llvm/test/Transforms/NewGVN/tbaa.ll | 130 ++ .../NewGVN/unreachable_block_infinite_loop.ll | 43 + .../test/Transforms/NewGVN/volatile-nonvolatile.ll | 62 + 97 files changed, 8160 insertions(+) create mode 100644 llvm/include/llvm/Transforms/Scalar/GVNExpression.h create mode 100644 llvm/include/llvm/Transforms/Scalar/NewGVN.h create mode 100644 llvm/lib/Transforms/Scalar/NewGVN.cpp create mode 100644 llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll create mode 100644 llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll create mode 100644 llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll create mode 100644 llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll create mode 100644 llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll create mode 100644 llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll create mode 100644 llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll create mode 100644 llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll create mode 100644 llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll create mode 100644 llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll create mode 100644 llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll create mode 100644 llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll create mode 100644 llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll create mode 100644 llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll create mode 100644 llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll create mode 100644 llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll create mode 100644 llvm/test/Transforms/NewGVN/MemdepMiscompile.ll create mode 100644 llvm/test/Transforms/NewGVN/assume-equal.ll create mode 100644 llvm/test/Transforms/NewGVN/basic-undef-test.ll create mode 100644 llvm/test/Transforms/NewGVN/basic.ll create mode 100644 llvm/test/Transforms/NewGVN/big-endian.ll create mode 100644 llvm/test/Transforms/NewGVN/bitcast-of-call.ll create mode 100644 llvm/test/Transforms/NewGVN/br-identical.ll create mode 100644 llvm/test/Transforms/NewGVN/calloc-load-removal.ll create mode 100644 llvm/test/Transforms/NewGVN/calls-nonlocal.ll create mode 100644 llvm/test/Transforms/NewGVN/calls-readonly.ll create mode 100644 llvm/test/Transforms/NewGVN/commute.ll create mode 100644 llvm/test/Transforms/NewGVN/cond_br.ll create mode 100644 llvm/test/Transforms/NewGVN/cond_br2.ll create mode 100644 llvm/test/Transforms/NewGVN/condprop.ll create mode 100644 llvm/test/Transforms/NewGVN/crash-no-aa.ll create mode 100644 llvm/test/Transforms/NewGVN/crash.ll create mode 100644 llvm/test/Transforms/NewGVN/dbg-redundant-load.ll create mode 100644 llvm/test/Transforms/NewGVN/edge.ll create mode 100644 llvm/test/Transforms/NewGVN/fence.ll create mode 100644 llvm/test/Transforms/NewGVN/flags.ll create mode 100644 llvm/test/Transforms/NewGVN/fold-const-expr.ll create mode 100644 llvm/test/Transforms/NewGVN/fpmath.ll create mode 100644 llvm/test/Transforms/NewGVN/funclet.ll create mode 100644 llvm/test/Transforms/NewGVN/invariant.group.ll create mode 100644 llvm/test/Transforms/NewGVN/invariant.start.ll create mode 100644 llvm/test/Transforms/NewGVN/lifetime-simple.ll create mode 100644 llvm/test/Transforms/NewGVN/load-constant-mem.ll create mode 100644 llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll create mode 100644 llvm/test/Transforms/NewGVN/malloc-load-removal.ll create mode 100644 llvm/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll create mode 100644 llvm/test/Transforms/NewGVN/noalias.ll create mode 100644 llvm/test/Transforms/NewGVN/non-local-offset.ll create mode 100644 llvm/test/Transforms/NewGVN/nonescaping-malloc.ll create mode 100644 llvm/test/Transforms/NewGVN/null-aliases-nothing.ll create mode 100644 llvm/test/Transforms/NewGVN/opt-remarks.ll create mode 100644 llvm/test/Transforms/NewGVN/phi-translate-partial-alias.ll create mode 100644 llvm/test/Transforms/NewGVN/pr10820.ll create mode 100644 llvm/test/Transforms/NewGVN/pr12979.ll create mode 100644 llvm/test/Transforms/NewGVN/pr14166.ll create mode 100644 llvm/test/Transforms/NewGVN/pr17732.ll create mode 100644 llvm/test/Transforms/NewGVN/pr17852.ll create mode 100644 llvm/test/Transforms/NewGVN/pr24397.ll create mode 100644 llvm/test/Transforms/NewGVN/pr24426.ll create mode 100644 llvm/test/Transforms/NewGVN/pr25440.ll create mode 100644 llvm/test/Transforms/NewGVN/pr28562.ll create mode 100644 llvm/test/Transforms/NewGVN/pre-compare.ll create mode 100644 llvm/test/Transforms/NewGVN/pre-new-inst.ll create mode 100644 llvm/test/Transforms/NewGVN/propagate-ir-flags.ll create mode 100644 llvm/test/Transforms/NewGVN/range.ll create mode 100644 llvm/test/Transforms/NewGVN/readattrs.ll create mode 100644 llvm/test/Transforms/NewGVN/rle-must-alias.ll create mode 100644 llvm/test/Transforms/NewGVN/rle-no-phi-translate.ll create mode 100644 llvm/test/Transforms/NewGVN/rle-nonlocal.ll create mode 100644 llvm/test/Transforms/NewGVN/stale-loop-info.ll create mode 100644 llvm/test/Transforms/NewGVN/tbaa.ll create mode 100644 llvm/test/Transforms/NewGVN/unreachable_block_infinite_loop.ll create mode 100644 llvm/test/Transforms/NewGVN/volatile-nonvolatile.ll diff --git a/llvm/include/llvm-c/Transforms/Scalar.h b/llvm/include/llvm-c/Transforms/Scalar.h index e45a780..8991e09 100644 --- a/llvm/include/llvm-c/Transforms/Scalar.h +++ b/llvm/include/llvm-c/Transforms/Scalar.h @@ -56,6 +56,9 @@ void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM); /** See llvm::createGVNPass function. */ void LLVMAddGVNPass(LLVMPassManagerRef PM); +/** See llvm::createGVNPass function. */ +void LLVMAddNewGVNPass(LLVMPassManagerRef PM); + /** See llvm::createIndVarSimplifyPass function. */ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index acd99ef..45c6fc5 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -252,6 +252,7 @@ void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &); void initializeNameAnonGlobalLegacyPassPass(PassRegistry &); void initializeNaryReassociateLegacyPassPass(PassRegistry &); +void initializeNewGVNPass(PassRegistry&); void initializeNoAAPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); void initializeObjCARCAPElimPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index a6bc9cf..e50137f 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -167,6 +167,7 @@ namespace { (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); + (void) llvm::createNewGVNPass(); (void) llvm::createMemCpyOptPass(); (void) llvm::createLoopDeletionPass(); (void) llvm::createPostDomTree(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 1c834e9..92558937 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -348,6 +348,13 @@ FunctionPass *createMergedLoadStoreMotionPass(); //===----------------------------------------------------------------------===// // +// GVN - This pass performs global value numbering and redundant load +// elimination cotemporaneously. +// +FunctionPass *createNewGVNPass(); + +//===----------------------------------------------------------------------===// +// // MemCpyOpt - This pass performs optimizations related to eliminating memcpy // calls and/or combining multiple stores into memset's. // diff --git a/llvm/include/llvm/Transforms/Scalar/GVNExpression.h b/llvm/include/llvm/Transforms/Scalar/GVNExpression.h new file mode 100644 index 0000000..db67db8 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/GVNExpression.h @@ -0,0 +1,551 @@ +//======- GVNExpression.h - GVN Expression classes -------*- C++ -*-==-------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The header file for the GVN pass that contains expression handling +/// classes +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_GVNEXPRESSION_H +#define LLVM_TRANSFORMS_SCALAR_GVNEXPRESSION_H + +#include "llvm/ADT/Hashing.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +class MemoryAccess; + +namespace GVNExpression { + +enum ExpressionType { + ET_Base, + ET_Constant, + ET_Variable, + ET_BasicStart, + ET_Basic, + ET_Call, + ET_AggregateValue, + ET_Phi, + ET_Load, + ET_Store, + ET_BasicEnd +}; + +class Expression { +private: + ExpressionType EType; + unsigned Opcode; + +public: + Expression(const Expression &) = delete; + Expression(ExpressionType ET = ET_Base, unsigned O = ~2U) + : EType(ET), Opcode(O) {} + void operator=(const Expression &) = delete; + virtual ~Expression(); + + static unsigned getEmptyKey() { return ~0U; } + static unsigned getTombstoneKey() { return ~1U; } + + bool operator==(const Expression &Other) const { + if (getOpcode() != Other.getOpcode()) + return false; + if (getOpcode() == getEmptyKey() || getOpcode() == getTombstoneKey()) + return true; + // Compare the expression type for anything but load and store. + // For load and store we set the opcode to zero. + // This is needed for load coercion. + if (getExpressionType() != ET_Load && + getExpressionType() != ET_Store && + getExpressionType() != Other.getExpressionType()) + return false; + + return equals(Other); + } + + virtual bool equals(const Expression &Other) const { return true; } + + unsigned getOpcode() const { return Opcode; } + void setOpcode(unsigned opcode) { Opcode = opcode; } + ExpressionType getExpressionType() const { return EType; } + + virtual hash_code getHashValue() const { + return hash_combine(getExpressionType(), getOpcode()); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const { + if (PrintEType) + OS << "etype = " << getExpressionType() << ","; + OS << "opcode = " << getOpcode() << ", "; + } + + void print(raw_ostream &OS) const { + OS << "{ "; + printInternal(OS, true); + OS << "}"; + } + void dump() const { print(dbgs()); } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const Expression &E) { + E.print(OS); + return OS; +} + +class BasicExpression : public Expression { +private: + typedef ArrayRecycler RecyclerType; + typedef RecyclerType::Capacity RecyclerCapacity; + Value **Operands; + unsigned MaxOperands; + unsigned NumOperands; + Type *ValueType; + +public: + static bool classof(const Expression *EB) { + ExpressionType ET = EB->getExpressionType(); + return ET > ET_BasicStart && ET < ET_BasicEnd; + } + + BasicExpression(unsigned NumOperands) + : BasicExpression(NumOperands, ET_Basic) {} + BasicExpression(unsigned NumOperands, ExpressionType ET) + : Expression(ET), Operands(nullptr), MaxOperands(NumOperands), + NumOperands(0), ValueType(nullptr) {} + virtual ~BasicExpression() override; + void operator=(const BasicExpression &) = delete; + BasicExpression(const BasicExpression &) = delete; + BasicExpression() = delete; + + /// \brief Swap two operands. Used during GVN to put commutative operands in + /// order. + void swapOperands(unsigned First, unsigned Second) { + std::swap(Operands[First], Operands[Second]); + } + + Value *getOperand(unsigned N) const { + assert(Operands && "Operands not allocated"); + assert(N < NumOperands && "Operand out of range"); + return Operands[N]; + } + + void setOperand(unsigned N, Value *V) { + assert(Operands && "Operands not allocated before setting"); + assert(N < NumOperands && "Operand out of range"); + Operands[N] = V; + } + + unsigned getNumOperands() const { return NumOperands; } + + typedef Value **op_iterator; + typedef Value *const *const_ops_iterator; + op_iterator ops_begin() { return Operands; } + op_iterator ops_end() { return Operands + NumOperands; } + const_ops_iterator ops_begin() const { return Operands; } + const_ops_iterator ops_end() const { return Operands + NumOperands; } + iterator_range operands() { + return iterator_range(ops_begin(), ops_end()); + } + iterator_range operands() const { + return iterator_range(ops_begin(), ops_end()); + } + + void ops_push_back(Value *Arg) { + assert(NumOperands < MaxOperands && "Tried to add too many operands"); + assert(Operands && "Operandss not allocated before pushing"); + Operands[NumOperands++] = Arg; + } + bool ops_empty() const { return getNumOperands() == 0; } + + void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator) { + assert(!Operands && "Operands already allocated"); + Operands = Recycler.allocate(RecyclerCapacity::get(MaxOperands), Allocator); + } + void deallocateOperands(RecyclerType &Recycler) { + Recycler.deallocate(RecyclerCapacity::get(MaxOperands), Operands); + } + + void setType(Type *T) { ValueType = T; } + Type *getType() const { return ValueType; } + + virtual bool equals(const Expression &Other) const override { + if (getOpcode() != Other.getOpcode()) + return false; + + const auto &OE = cast(Other); + if (getType() != OE.getType()) + return false; + if (NumOperands != OE.NumOperands) + return false; + if (!std::equal(ops_begin(), ops_end(), OE.ops_begin())) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(getExpressionType(), getOpcode(), ValueType, + hash_combine_range(ops_begin(), ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeBasic, "; + + this->Expression::printInternal(OS, false); + OS << "operands = {"; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << "[" << i << "] = "; + Operands[i]->printAsOperand(OS); + OS << " "; + } + OS << "} "; + } +}; + +class CallExpression final : public BasicExpression { +private: + CallInst *Call; + MemoryAccess *DefiningAccess; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Call; + } + + CallExpression(unsigned NumOperands, CallInst *C, MemoryAccess *DA) + : BasicExpression(NumOperands, ET_Call), Call(C), + DefiningAccess(DA) {} + void operator=(const CallExpression &) = delete; + CallExpression(const CallExpression &) = delete; + CallExpression() = delete; + virtual ~CallExpression() override; + + virtual bool equals(const Expression &Other) const override { + if (!this->BasicExpression::equals(Other)) + return false; + const auto &OE = cast(Other); + return DefiningAccess == OE.DefiningAccess; + } + + virtual hash_code getHashValue() const override { + return hash_combine(this->BasicExpression::getHashValue(), DefiningAccess); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeCall, "; + this->BasicExpression::printInternal(OS, false); + OS << " represents call at " << Call; + } +}; + +class LoadExpression final : public BasicExpression { +private: + LoadInst *Load; + MemoryAccess *DefiningAccess; + unsigned Alignment; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Load; + } + + LoadExpression(unsigned NumOperands, LoadInst *L, MemoryAccess *DA) + : LoadExpression(ET_Load, NumOperands, L, DA) {} + LoadExpression(enum ExpressionType EType, unsigned NumOperands, + LoadInst *L, MemoryAccess *DA) + : BasicExpression(NumOperands, EType), Load(L), DefiningAccess(DA) { + Alignment = L ? L->getAlignment() : 0; + } + void operator=(const LoadExpression &) = delete; + LoadExpression(const LoadExpression &) = delete; + LoadExpression() = delete; + virtual ~LoadExpression() override; + + LoadInst *getLoadInst() const { return Load; } + void setLoadInst(LoadInst *L) { Load = L; } + + MemoryAccess *getDefiningAccess() const { return DefiningAccess; } + void setDefiningAccess(MemoryAccess *MA) { DefiningAccess = MA; } + unsigned getAlignment() const { return Alignment; } + void setAlignment(unsigned Align) { Alignment = Align; } + + virtual bool equals(const Expression &Other) const override; + + virtual hash_code getHashValue() const override { + return hash_combine(getOpcode(), getType(), DefiningAccess, + hash_combine_range(ops_begin(), ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeLoad, "; + this->BasicExpression::printInternal(OS, false); + OS << " represents Load at " << Load; + OS << " with DefiningAccess " << DefiningAccess; + } +}; + +class StoreExpression final : public BasicExpression { +private: + StoreInst *Store; + MemoryAccess *DefiningAccess; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Store; + } + + StoreExpression(unsigned NumOperands, StoreInst *S, MemoryAccess *DA) + : BasicExpression(NumOperands, ET_Store), Store(S), + DefiningAccess(DA) {} + void operator=(const StoreExpression &) = delete; + StoreExpression(const StoreExpression &) = delete; + StoreExpression() = delete; + virtual ~StoreExpression() override; + + StoreInst *getStoreInst() const { return Store; } + MemoryAccess *getDefiningAccess() const { return DefiningAccess; } + + virtual bool equals(const Expression &Other) const override; + + virtual hash_code getHashValue() const override { + return hash_combine(getOpcode(), getType(), DefiningAccess, + hash_combine_range(ops_begin(), ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeStore, "; + this->BasicExpression::printInternal(OS, false); + OS << " represents Store at " << Store; + } +}; + +class AggregateValueExpression final : public BasicExpression { +private: + unsigned MaxIntOperands; + unsigned NumIntOperands; + unsigned *IntOperands; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_AggregateValue; + } + + AggregateValueExpression(unsigned NumOperands, + unsigned NumIntOperands) + : BasicExpression(NumOperands, ET_AggregateValue), + MaxIntOperands(NumIntOperands), NumIntOperands(0), + IntOperands(nullptr) {} + + void operator=(const AggregateValueExpression &) = delete; + AggregateValueExpression(const AggregateValueExpression &) = delete; + AggregateValueExpression() = delete; + virtual ~AggregateValueExpression() override; + + typedef unsigned *int_arg_iterator; + typedef const unsigned *const_int_arg_iterator; + + int_arg_iterator int_ops_begin() { return IntOperands; } + int_arg_iterator int_ops_end() { return IntOperands + NumIntOperands; } + const_int_arg_iterator int_ops_begin() const { return IntOperands; } + const_int_arg_iterator int_ops_end() const { + return IntOperands + NumIntOperands; + } + unsigned int_ops_size() const { return NumIntOperands; } + bool int_ops_empty() const { return NumIntOperands == 0; } + void int_ops_push_back(unsigned IntOperand) { + assert(NumIntOperands < MaxIntOperands && + "Tried to add too many int operands"); + assert(IntOperands && "Operands not allocated before pushing"); + IntOperands[NumIntOperands++] = IntOperand; + } + + virtual void allocateIntOperands(BumpPtrAllocator &Allocator) { + assert(!IntOperands && "Operands already allocated"); + IntOperands = Allocator.Allocate(MaxIntOperands); + } + + virtual bool equals(const Expression &Other) const override { + if (!this->BasicExpression::equals(Other)) + return false; + const AggregateValueExpression &OE = cast(Other); + if (NumIntOperands != OE.NumIntOperands) + return false; + if (!std::equal(int_ops_begin(), int_ops_end(), OE.int_ops_begin())) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(this->BasicExpression::getHashValue(), + hash_combine_range(int_ops_begin(), int_ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeAggregateValue, "; + this->BasicExpression::printInternal(OS, false); + OS << ", intoperands = {"; + for (unsigned i = 0, e = int_ops_size(); i != e; ++i) { + OS << "[" << i << "] = " << IntOperands[i] << " "; + } + OS << "}"; + } +}; + +class PHIExpression final : public BasicExpression { +private: + BasicBlock *BB; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Phi; + } + + PHIExpression(unsigned NumOperands, BasicBlock *B) + : BasicExpression(NumOperands, ET_Phi), BB(B) {} + void operator=(const PHIExpression &) = delete; + PHIExpression(const PHIExpression &) = delete; + PHIExpression() = delete; + virtual ~PHIExpression() override; + + virtual bool equals(const Expression &Other) const override { + if (!this->BasicExpression::equals(Other)) + return false; + const PHIExpression &OE = cast(Other); + if (BB != OE.BB) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(this->BasicExpression::getHashValue(), BB); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypePhi, "; + this->BasicExpression::printInternal(OS, false); + OS << "bb = " << BB; + } +}; + +class VariableExpression final : public Expression { +private: + Value *VariableValue; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Variable; + } + + VariableExpression(Value *V) + : Expression(ET_Variable), VariableValue(V) {} + void operator=(const VariableExpression &) = delete; + VariableExpression(const VariableExpression &) = delete; + VariableExpression() = delete; + + Value *getVariableValue() const { return VariableValue; } + void setVariableValue(Value *V) { VariableValue = V; } + virtual bool equals(const Expression &Other) const override { + const VariableExpression &OC = cast(Other); + if (VariableValue != OC.VariableValue) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(getExpressionType(), VariableValue->getType(), + VariableValue); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeVariable, "; + this->Expression::printInternal(OS, false); + OS << " variable = " << *VariableValue; + } +}; + +class ConstantExpression final : public Expression { +private: + Constant *ConstantValue; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Constant; + } + + ConstantExpression() + : Expression(ET_Constant), ConstantValue(NULL) {} + ConstantExpression(Constant *constantValue) + : Expression(ET_Constant), ConstantValue(constantValue) {} + void operator=(const ConstantExpression &) = delete; + ConstantExpression(const ConstantExpression &) = delete; + + Constant *getConstantValue() const { return ConstantValue; } + void setConstantValue(Constant *V) { ConstantValue = V; } + + virtual bool equals(const Expression &Other) const override { + const ConstantExpression &OC = cast(Other); + return ConstantValue == OC.ConstantValue; + } + + virtual hash_code getHashValue() const override { + return hash_combine(getExpressionType(), ConstantValue->getType(), + ConstantValue); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeConstant, "; + this->Expression::printInternal(OS, false); + OS << " constant = " << *ConstantValue; + } +}; +} +} + +#endif diff --git a/llvm/include/llvm/Transforms/Scalar/NewGVN.h b/llvm/include/llvm/Transforms/Scalar/NewGVN.h new file mode 100644 index 0000000..d0425aa --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/NewGVN.h @@ -0,0 +1,28 @@ +//===----- NewGVN.h - Global Value Numbering Pass ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for LLVM's Global Value Numbering pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_NEWGVN_H +#define LLVM_TRANSFORMS_SCALAR_NEWGVN_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class NewGVNPass : public PassInfoMixin { +public: + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager &AM); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_NEWGVN_H + diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 7686b87..56df77f 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_library(LLVMScalarOpts MemCpyOptimizer.cpp MergedLoadStoreMotion.cpp NaryReassociate.cpp + NewGVN.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp Reassociate.cpp diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp new file mode 100644 index 0000000..9a637e1 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -0,0 +1,1853 @@ +//===---- NewGVN.cpp - Global Value Numbering Pass --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the new LLVM's Global Value Numbering pass. +/// GVN partitions values computed by a function into congruence classes. +/// Values ending up in the same congruence class are guaranteed to be the same +/// for every execution of the program. In that respect, congruency is a +/// compile-time approximation of equivalence of values at runtime. +/// The algorithm implemented here uses a sparse formulation and it's based +/// on the ideas described in the paper: +/// "A Sparse Algorithm for Predicated Global Value Numbering" from +/// Karthik Gargi. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/NewGVN.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVNExpression.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include +#include +using namespace llvm; +using namespace PatternMatch; +using namespace llvm::GVNExpression; + +#define DEBUG_TYPE "newgvn" + +STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); +STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); +STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); +STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); + +//===----------------------------------------------------------------------===// +// GVN Pass +//===----------------------------------------------------------------------===// + +// Anchor methods. +namespace llvm { +namespace GVNExpression { + Expression::~Expression() = default; + BasicExpression::~BasicExpression() = default; + CallExpression::~CallExpression() = default; + LoadExpression::~LoadExpression() = default; + StoreExpression::~StoreExpression() = default; + AggregateValueExpression::~AggregateValueExpression() = default; + PHIExpression::~PHIExpression() = default; +} +} + +// Congruence classes represent the set of expressions/instructions +// that are all the same *during some scope in the function*. +// That is, because of the way we perform equality propagation, and +// because of memory value numbering, it is not correct to assume +// you can willy-nilly replace any member with any other at any +// point in the function. +// +// For any Value in the Member set, it is valid to replace any dominated member +// with that Value. +// +// Every congruence class has a leader, and the leader is used to +// symbolize instructions in a canonical way (IE every operand of an +// instruction that is a member of the same congruence class will +// always be replaced with leader during symbolization). +// To simplify symbolization, we keep the leader as a constant if class can be +// proved to be a constant value. +// Otherwise, the leader is a randomly chosen member of the value set, it does +// not matter which one is chosen. +// Each congruence class also has a defining expression, +// though the expression may be null. If it exists, it can be used for forward +// propagation and reassociation of values. +// +struct CongruenceClass { + typedef SmallPtrSet MemberSet; + unsigned ID; + // Representative leader. + Value *RepLeader; + // Defining Expression. + const Expression *DefiningExpr; + // Actual members of this class. + MemberSet Members; + + // True if this class has no members left. This is mainly used for assertion + // purposes, and for skipping empty classes. + bool Dead; + + explicit CongruenceClass(unsigned ID) + : ID(ID), RepLeader(0), DefiningExpr(0), Dead(false) {} + CongruenceClass(unsigned ID, Value *Leader, const Expression *E) + : ID(ID), RepLeader(Leader), DefiningExpr(E), Dead(false) {} +}; + +namespace llvm { + template <> struct DenseMapInfo { + static const Expression *getEmptyKey() { + uintptr_t Val = static_cast(-1); + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return reinterpret_cast(Val); + } + static const Expression *getTombstoneKey() { + uintptr_t Val = static_cast(~1U); + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return reinterpret_cast(Val); + } + static unsigned getHashValue(const Expression *V) { + return static_cast(V->getHashValue()); + } + static bool isEqual(const Expression *LHS, const Expression *RHS) { + if (LHS == RHS) + return true; + if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || + LHS == getEmptyKey() || RHS == getEmptyKey()) + return false; + return *LHS == *RHS; + } + }; +} // end namespace llvm + +class NewGVN : public FunctionPass { + DominatorTree *DT; + const DataLayout *DL; + const TargetLibraryInfo *TLI; + AssumptionCache *AC; + AliasAnalysis *AA; + MemorySSA *MSSA; + MemorySSAWalker *MSSAWalker; + BumpPtrAllocator ExpressionAllocator; + ArrayRecycler ArgRecycler; + + // Congruence class info. + CongruenceClass *InitialClass; + std::vector CongruenceClasses; + unsigned NextCongruenceNum; + + // Value Mappings. + DenseMap ValueToClass; + DenseMap ValueToExpression; + + // Expression to class mapping. + typedef DenseMap ExpressionClassMap; + ExpressionClassMap ExpressionToClass; + + // Which values have changed as a result of leader changes. + SmallPtrSet ChangedValues; + + // Reachability info. + typedef BasicBlockEdge BlockEdge; + DenseSet ReachableEdges; + SmallPtrSet ReachableBlocks; + + // This is a bitvector because, on larger functions, we may have + // thousands of touched instructions at once (entire blocks, + // instructions with hundreds of uses, etc). Even with optimization + // for when we mark whole blocks as touched, when this was a + // SmallPtrSet or DenseSet, for some functions, we spent >20% of all + // the time in GVN just managing this list. The bitvector, on the + // other hand, efficiently supports test/set/clear of both + // individual and ranges, as well as "find next element" This + // enables us to use it as a worklist with essentially 0 cost. + BitVector TouchedInstructions; + + DenseMap> BlockInstRange; + DenseMap> + DominatedInstRange; + +#ifndef NDEBUG + // Debugging for how many times each block and instruction got processed. + DenseMap ProcessedCount; +#endif + + // DFS info. + DenseMap> DFSDomMap; + DenseMap InstrDFS; + std::vector DFSToInstr; + + // Deletion info. + SmallPtrSet InstructionsToErase; + +public: + static char ID; // Pass identification, replacement for typeid. + NewGVN() : FunctionPass(ID) { + initializeNewGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *TLI, AliasAnalysis *AA, + MemorySSA *MSSA); + +private: + // This transformation requires dominator postdominator info. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + + AU.addPreserved(); + AU.addPreserved(); + } + + // Expression handling. + const Expression *createExpression(Instruction *, const BasicBlock *); + const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *, + const BasicBlock *); + PHIExpression *createPHIExpression(Instruction *); + const VariableExpression *createVariableExpression(Value *); + const ConstantExpression *createConstantExpression(Constant *); + const Expression *createVariableOrConstant(Value *V, const BasicBlock *B); + const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *, + const BasicBlock *); + LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, + MemoryAccess *, const BasicBlock *); + + const CallExpression *createCallExpression(CallInst *, MemoryAccess *, + const BasicBlock *); + const AggregateValueExpression * + createAggregateValueExpression(Instruction *, const BasicBlock *); + bool setBasicExpressionInfo(Instruction *, BasicExpression *, + const BasicBlock *); + + // Congruence class handling. + CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { + CongruenceClass *result = + new CongruenceClass(NextCongruenceNum++, Leader, E); + CongruenceClasses.emplace_back(result); + return result; + } + + CongruenceClass *createSingletonCongruenceClass(Value *Member) { + CongruenceClass *CClass = createCongruenceClass(Member, NULL); + CClass->Members.insert(Member); + ValueToClass[Member] = CClass; + return CClass; + } + void initializeCongruenceClasses(Function &F); + + // Symbolic evaluation. + const Expression *checkSimplificationResults(Expression *, Instruction *, + Value *); + const Expression *performSymbolicEvaluation(Value *, const BasicBlock *); + const Expression *performSymbolicLoadEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicStoreEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicCallEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicPHIEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicAggrValueEvaluation(Instruction *, + const BasicBlock *); + + // Congruence finding. + // Templated to allow them to work both on BB's and BB-edges. + template + Value *lookupOperandLeader(Value *, const User *, const T &) const; + void performCongruenceFinding(Value *, const Expression *); + + // Reachability handling. + void updateReachableEdge(BasicBlock *, BasicBlock *); + void processOutgoingEdges(TerminatorInst *, BasicBlock *); + bool isOnlyReachableViaThisEdge(const BasicBlockEdge &); + Value *findConditionEquivalence(Value *, BasicBlock *) const; + + // Elimination. + struct ValueDFS; + void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, + std::vector &); + + bool eliminateInstructions(Function &); + void replaceInstruction(Instruction *, Value *); + void markInstructionForDeletion(Instruction *); + void deleteInstructionsInBlock(BasicBlock *); + + // New instruction creation. + void handleNewInstruction(Instruction *){}; + void markUsersTouched(Value *); + void markMemoryUsersTouched(MemoryAccess *); + + // Utilities. + void cleanupTables(); + std::pair assignDFSNumbers(BasicBlock *, unsigned); + void updateProcessedCount(Value *V); +}; + +char NewGVN::ID = 0; + +// createGVNPass - The public interface to this file. +FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); } + +bool LoadExpression::equals(const Expression &Other) const { + if (!isa(Other) && !isa(Other)) + return false; + if (!this->BasicExpression::equals(Other)) + return false; + if (const auto *OtherL = dyn_cast(&Other)) { + if (DefiningAccess != OtherL->getDefiningAccess()) + return false; + } else if (const auto *OtherS = dyn_cast(&Other)) { + if (DefiningAccess != OtherS->getDefiningAccess()) + return false; + } + + return true; +} + +bool StoreExpression::equals(const Expression &Other) const { + if (!isa(Other) && !isa(Other)) + return false; + if (!this->BasicExpression::equals(Other)) + return false; + if (const auto *OtherL = dyn_cast(&Other)) { + if (DefiningAccess != OtherL->getDefiningAccess()) + return false; + } else if (const auto *OtherS = dyn_cast(&Other)) { + if (DefiningAccess != OtherS->getDefiningAccess()) + return false; + } + + return true; +} + +#ifndef NDEBUG +static std::string getBlockName(const BasicBlock *B) { + return DOTGraphTraits::getSimpleNodeLabel(B, NULL); +} +#endif + +INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) + +PHIExpression *NewGVN::createPHIExpression(Instruction *I) { + BasicBlock *PhiBlock = I->getParent(); + PHINode *PN = cast(I); + PHIExpression *E = new (ExpressionAllocator) + PHIExpression(PN->getNumOperands(), I->getParent()); + + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(I->getType()); + E->setOpcode(I->getOpcode()); + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + BasicBlock *B = PN->getIncomingBlock(i); + if (!ReachableBlocks.count(B)) { + DEBUG(dbgs() << "Skipping unreachable block " << getBlockName(B) + << " in PHI node " << *PN << "\n"); + continue; + } + if (I->getOperand(i) != I) { + const BasicBlockEdge BBE(B, PhiBlock); + auto Operand = lookupOperandLeader(I->getOperand(i), I, BBE); + E->ops_push_back(Operand); + } else { + E->ops_push_back(I->getOperand(i)); + } + } + return E; +} + +// Set basic expression info (Arguments, type, opcode) for Expression +// E from Instruction I in block B. +bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, + const BasicBlock *B) { + bool AllConstant = true; + if (auto *GEP = dyn_cast(I)) + E->setType(GEP->getSourceElementType()); + else + E->setType(I->getType()); + E->setOpcode(I->getOpcode()); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + + for (auto &O : I->operands()) { + auto Operand = lookupOperandLeader(O, I, B); + if (!isa(Operand)) + AllConstant = false; + E->ops_push_back(Operand); + } + return AllConstant; +} + +const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, + Value *Arg1, Value *Arg2, + const BasicBlock *B) { + BasicExpression *E = new (ExpressionAllocator) BasicExpression(2); + + E->setType(T); + E->setOpcode(Opcode); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + if (Instruction::isCommutative(Opcode)) { + // Ensure that commutative instructions that only differ by a permutation + // of their operands get the same value number by sorting the operand value + // numbers. Since all commutative instructions have two operands it is more + // efficient to sort by hand rather than using, say, std::sort. + if (Arg1 > Arg2) + std::swap(Arg1, Arg2); + } + E->ops_push_back(lookupOperandLeader(Arg1, nullptr, B)); + E->ops_push_back(lookupOperandLeader(Arg2, nullptr, B)); + + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI, + DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) + return SimplifiedE; + return E; +} + +// Take a Value returned by simplification of Expression E/Instruction +// I, and see if it resulted in a simpler expression. If so, return +// that expression. +// TODO: Once finished, this should not take an Instruction, we only +// use it for printing. +const Expression *NewGVN::checkSimplificationResults(Expression *E, + Instruction *I, Value *V) { + if (!V) + return nullptr; + if (auto *C = dyn_cast(V)) { + if (I) + DEBUG(dbgs() << "Simplified " << *I << " to " + << " constant " << *C << "\n"); + NumGVNOpsSimplified++; + assert(isa(E) && + "We should always have had a basic expression here"); + + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return createConstantExpression(C); + } else if (isa(V) || isa(V)) { + if (I) + DEBUG(dbgs() << "Simplified " << *I << " to " + << " variable " << *V << "\n"); + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return createVariableExpression(V); + } + + CongruenceClass *CC = ValueToClass.lookup(V); + if (CC && CC->DefiningExpr) { + if (I) + DEBUG(dbgs() << "Simplified " << *I << " to " + << " expression " << *V << "\n"); + NumGVNOpsSimplified++; + assert(isa(E) && + "We should always have had a basic expression here"); + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return CC->DefiningExpr; + } + return nullptr; +} + +const Expression *NewGVN::createExpression(Instruction *I, + const BasicBlock *B) { + + BasicExpression *E = + new (ExpressionAllocator) BasicExpression(I->getNumOperands()); + + bool AllConstant = setBasicExpressionInfo(I, E, B); + + if (I->isCommutative()) { + // Ensure that commutative instructions that only differ by a permutation + // of their operands get the same value number by sorting the operand value + // numbers. Since all commutative instructions have two operands it is more + // efficient to sort by hand rather than using, say, std::sort. + assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); + if (E->getOperand(0) > E->getOperand(1)) + E->swapOperands(0, 1); + } + + // Perform simplificaiton + // TODO: Right now we only check to see if we get a constant result. + // We may get a less than constant, but still better, result for + // some operations. + // IE + // add 0, x -> x + // and x, x -> x + // We should handle this by simply rewriting the expression. + if (auto *CI = dyn_cast(I)) { + // Sort the operand value numbers so xx get the same value + // number. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (E->getOperand(0) > E->getOperand(1)) { + E->swapOperands(0, 1); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + E->setOpcode((CI->getOpcode() << 8) | Predicate); + // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands + // TODO: Since we noop bitcasts, we may need to check types before + // simplifying, so that we don't end up simplifying based on a wrong + // type assumption. We should clean this up so we can use constants of the + // wrong type + + assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() && + "Wrong types on cmp instruction"); + if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() && + E->getOperand(1)->getType() == I->getOperand(1)->getType())) { + Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), + *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } + } else if (isa(I)) { + if (isa(E->getOperand(0)) || + (E->getOperand(1)->getType() == I->getOperand(1)->getType() && + E->getOperand(2)->getType() == I->getOperand(2)->getType())) { + Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), + E->getOperand(2), *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } + } else if (I->isBinaryOp()) { + Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), + *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } else if (auto *BI = dyn_cast(I)) { + Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } else if (isa(I)) { + Value *V = SimplifyGEPInst(E->getType(), + ArrayRef(E->ops_begin(), E->ops_end()), + *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } else if (AllConstant) { + // We don't bother trying to simplify unless all of the operands + // were constant. + // TODO: There are a lot of Simplify*'s we could call here, if we + // wanted to. The original motivating case for this code was a + // zext i1 false to i8, which we don't have an interface to + // simplify (IE there is no SimplifyZExt). + + SmallVector C; + for (Value *Arg : E->operands()) + C.emplace_back(cast(Arg)); + + if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI)) + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } + return E; +} + +const AggregateValueExpression * +NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) { + if (auto *II = dyn_cast(I)) { + AggregateValueExpression *E = new (ExpressionAllocator) + AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); + setBasicExpressionInfo(I, E, B); + E->allocateIntOperands(ExpressionAllocator); + + for (auto &Index : II->indices()) + E->int_ops_push_back(Index); + return E; + + } else if (auto *EI = dyn_cast(I)) { + AggregateValueExpression *E = new (ExpressionAllocator) + AggregateValueExpression(I->getNumOperands(), EI->getNumIndices()); + setBasicExpressionInfo(EI, E, B); + E->allocateIntOperands(ExpressionAllocator); + + for (auto &Index : EI->indices()) + E->int_ops_push_back(Index); + return E; + } + llvm_unreachable("Unhandled type of aggregate value operation"); +} + +const VariableExpression * +NewGVN::createVariableExpression(Value *V) { + VariableExpression *E = new (ExpressionAllocator) VariableExpression(V); + E->setOpcode(V->getValueID()); + return E; +} + +const Expression *NewGVN::createVariableOrConstant(Value *V, + const BasicBlock *B) { + auto Leader = lookupOperandLeader(V, nullptr, B); + if (auto *C = dyn_cast(Leader)) + return createConstantExpression(C); + return createVariableExpression(Leader); +} + +const ConstantExpression * +NewGVN::createConstantExpression(Constant *C) { + ConstantExpression *E = new (ExpressionAllocator) ConstantExpression(C); + E->setOpcode(C->getValueID()); + return E; +} + +const CallExpression *NewGVN::createCallExpression(CallInst *CI, + MemoryAccess *HV, + const BasicBlock *B) { + // FIXME: Add operand bundles for calls. + CallExpression *E = + new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV); + setBasicExpressionInfo(CI, E, B); + return E; +} + +// See if we have a congruence class and leader for this operand, and if so, +// return it. Otherwise, return the operand itself. +template +Value *NewGVN::lookupOperandLeader(Value *V, const User *U, + const T &B) const { + CongruenceClass *CC = ValueToClass.lookup(V); + if (CC && (CC != InitialClass)) + return CC->RepLeader; + return V; +} + +LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, + LoadInst *LI, MemoryAccess *DA, + const BasicBlock *B) { + LoadExpression *E = new (ExpressionAllocator) LoadExpression(1, LI, DA); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(LoadType); + + // Give store and loads same opcode so they value number together. + E->setOpcode(0); + auto Operand = lookupOperandLeader(PointerOp, LI, B); + E->ops_push_back(Operand); + if (LI) + E->setAlignment(LI->getAlignment()); + + // TODO: Value number heap versions. We may be able to discover + // things alias analysis can't on it's own (IE that a store and a + // load have the same value, and thus, it isn't clobbering the load). + return E; +} + +const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, + MemoryAccess *DA, + const BasicBlock *B) { + StoreExpression *E = + new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(SI->getValueOperand()->getType()); + + // Give store and loads same opcode so they value number together. + E->setOpcode(0); + E->ops_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B)); + + // TODO: Value number heap versions. We may be able to discover + // things alias analysis can't on it's own (IE that a store and a + // load have the same value, and thus, it isn't clobbering the load). + return E; +} + +const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, + const BasicBlock *B) { + StoreInst *SI = cast(I); + const Expression *E = createStoreExpression(SI, MSSA->getMemoryAccess(SI), B); + return E; +} + +const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, + const BasicBlock *B) { + LoadInst *LI = cast(I); + + // We can eliminate in favor of non-simple loads, but we won't be able to + // eliminate them. + if (!LI->isSimple()) + return nullptr; + + Value *LoadAddressLeader = + lookupOperandLeader(LI->getPointerOperand(), I, B); + // Load of undef is undef. + if (isa(LoadAddressLeader)) + return createConstantExpression(UndefValue::get(LI->getType())); + + MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + + if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { + if (auto *MD = dyn_cast(DefiningAccess)) { + Instruction *DefiningInst = MD->getMemoryInst(); + // If the defining instruction is not reachable, replace with undef. + if (!ReachableBlocks.count(DefiningInst->getParent())) + return createConstantExpression(UndefValue::get(LI->getType())); + } + } + + const Expression *E = createLoadExpression( + LI->getType(), LI->getPointerOperand(), LI, DefiningAccess, B); + return E; +} + +// Evaluate read only and pure calls, and create an expression result. +const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I, + const BasicBlock *B) { + CallInst *CI = cast(I); + if (AA->doesNotAccessMemory(CI)) + return createCallExpression(CI, nullptr, B); + else if (AA->onlyReadsMemory(CI)) + return createCallExpression(CI, MSSAWalker->getClobberingMemoryAccess(CI), + B); + else + return nullptr; +} + +// Evaluate PHI nodes symbolically, and create an expression result. +const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, + const BasicBlock *B) { + PHIExpression *E = cast(createPHIExpression(I)); + if (E->ops_empty()) { + DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" + << "\n"); + E->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return createConstantExpression(UndefValue::get(I->getType())); + } + + Value *AllSameValue = E->getOperand(0); + + // See if all arguments are the same, ignoring undef arguments, because we can + // choose a value that is the same for them. + for (const Value *Arg : E->operands()) + if (Arg != AllSameValue && !isa(Arg)) { + AllSameValue = NULL; + break; + } + + if (AllSameValue) { + // It's possible to have phi nodes with cycles (IE dependent on + // other phis that are .... dependent on the original phi node), + // especially in weird CFG's where some arguments are unreachable, or + // uninitialized along certain paths. + // This can cause infinite loops during evaluation (even if you disable + // the recursion below, you will simply ping-pong between congruence + // classes). If a phi node symbolically evaluates to another phi node, + // just leave it alone. If they are really the same, we will still + // eliminate them in favor of each other. + if (isa(AllSameValue)) + return E; + NumGVNPhisAllSame++; + DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue + << "\n"); + E->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + if (auto *C = dyn_cast(AllSameValue)) + return createConstantExpression(C); + return createVariableExpression(AllSameValue); + } + return E; +} + +const Expression * +NewGVN::performSymbolicAggrValueEvaluation(Instruction *I, + const BasicBlock *B) { + if (auto *EI = dyn_cast(I)) { + auto *II = dyn_cast(EI->getAggregateOperand()); + if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { + unsigned Opcode = 0; + // EI might be an extract from one of our recognised intrinsics. If it + // is we'll synthesize a semantically equivalent expression instead on + // an extract value expression. + switch (II->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + Opcode = Instruction::Add; + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + Opcode = Instruction::Sub; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + Opcode = Instruction::Mul; + break; + default: + break; + } + + if (Opcode != 0) { + // Intrinsic recognized. Grab its args to finish building the + // expression. + assert(II->getNumArgOperands() == 2 && + "Expect two args for recognised intrinsics."); + return createBinaryExpression(Opcode, EI->getType(), + II->getArgOperand(0), + II->getArgOperand(1), B); + } + } + } + + return createAggregateValueExpression(I, B); +} + +// Substitute and symbolize the value before value numbering. +const Expression *NewGVN::performSymbolicEvaluation(Value *V, + const BasicBlock *B) { + const Expression *E = NULL; + if (auto *C = dyn_cast(V)) + E = createConstantExpression(C); + else if (isa(V) || isa(V)) { + E = createVariableExpression(V); + } else { + // TODO: memory intrinsics. + // TODO: Some day, we should do the forward propagation and reassociation + // parts of the algorithm. + Instruction *I = cast(V); + switch (I->getOpcode()) { + case Instruction::ExtractValue: + case Instruction::InsertValue: + E = performSymbolicAggrValueEvaluation(I, B); + break; + case Instruction::PHI: + E = performSymbolicPHIEvaluation(I, B); + break; + case Instruction::Call: + E = performSymbolicCallEvaluation(I, B); + break; + case Instruction::Store: + E = performSymbolicStoreEvaluation(I, B); + break; + case Instruction::Load: + E = performSymbolicLoadEvaluation(I, B); + break; + case Instruction::BitCast: { + E = createExpression(I, B); + } break; + + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::GetElementPtr: + E = createExpression(I, B); + break; + default: + return nullptr; + } + } + if (!E) + return nullptr; + return E; +} + +// There is an edge from 'Src' to 'Dst'. Return true if every path from +// the entry block to 'Dst' passes via this edge. In particular 'Dst' +// must not be reachable via another edge from 'Src'. +bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) { + + // While in theory it is interesting to consider the case in which Dst has + // more than one predecessor, because Dst might be part of a loop which is + // only reachable from Src, in practice it is pointless since at the time + // GVN runs all such loops have preheaders, which means that Dst will have + // been changed to have only one predecessor, namely Src. + const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); + const BasicBlock *Src = E.getStart(); + assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); + (void)Src; + return Pred != nullptr; +} + +void NewGVN::markUsersTouched(Value *V) { + // Now mark the users as touched. + for (auto &U : V->uses()) { + auto *User = dyn_cast(U.getUser()); + assert(User && "Use of value not within an instruction?"); + TouchedInstructions.set(InstrDFS[User]); + } +} + +void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) { + for (auto U : MA->users()) { + if (auto *MUD = dyn_cast(U)) + TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]); + else + TouchedInstructions.set(InstrDFS[MA]); + } +} + +// Perform congruence finding on a given value numbering expression. +void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { + + ValueToExpression[V] = E; + // This is guaranteed to return something, since it will at least find + // INITIAL. + CongruenceClass *VClass = ValueToClass[V]; + assert(VClass && "Should have found a vclass"); + // Dead classes should have been eliminated from the mapping. + assert(!VClass->Dead && "Found a dead class"); + + CongruenceClass *EClass; + // Expressions we can't symbolize are always in their own unique + // congruence class. + if (E == NULL) { + // We may have already made a unique class. + if (VClass->Members.size() != 1 || VClass->RepLeader != V) { + CongruenceClass *NewClass = createCongruenceClass(V, NULL); + // We should always be adding the member in the below code. + EClass = NewClass; + DEBUG(dbgs() << "Created new congruence class for " << *V + << " due to NULL expression\n"); + } else { + EClass = VClass; + } + } else if (const auto *VE = dyn_cast(E)) { + EClass = ValueToClass[VE->getVariableValue()]; + } else { + auto lookupResult = ExpressionToClass.insert({E, nullptr}); + + // If it's not in the value table, create a new congruence class. + if (lookupResult.second) { + CongruenceClass *NewClass = createCongruenceClass(NULL, E); + auto place = lookupResult.first; + place->second = NewClass; + + // Constants and variables should always be made the leader. + if (const auto *CE = dyn_cast(E)) + NewClass->RepLeader = CE->getConstantValue(); + else if (const auto *VE = dyn_cast(E)) + NewClass->RepLeader = VE->getVariableValue(); + else if (const auto *SE = dyn_cast(E)) + NewClass->RepLeader = SE->getStoreInst()->getValueOperand(); + else + NewClass->RepLeader = V; + + EClass = NewClass; + DEBUG(dbgs() << "Created new congruence class for " << *V + << " using expression " << *E << " at " << NewClass->ID + << "\n"); + DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); + } else { + EClass = lookupResult.first->second; + assert(EClass && "Somehow don't have an eclass"); + + assert(!EClass->Dead && "We accidentally looked up a dead class"); + } + } + bool WasInChanged = ChangedValues.erase(V); + if (VClass != EClass || WasInChanged) { + DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E + << "\n"); + + if (VClass != EClass) { + DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID + << "\n"); + + VClass->Members.erase(V); + EClass->Members.insert(V); + ValueToClass[V] = EClass; + // See if we destroyed the class or need to swap leaders. + if (VClass->Members.empty() && VClass != InitialClass) { + if (VClass->DefiningExpr) { + VClass->Dead = true; + DEBUG(dbgs() << "Erasing expression " << *E << " from table\n"); + ExpressionToClass.erase(VClass->DefiningExpr); + } + } else if (VClass->RepLeader == V) { + // FIXME: When the leader changes, the value numbering of + // everything may change, so we need to reprocess. + VClass->RepLeader = *(VClass->Members.begin()); + for (auto M : VClass->Members) { + if (auto *I = dyn_cast(M)) + TouchedInstructions.set(InstrDFS[I]); + ChangedValues.insert(M); + } + } + } + markUsersTouched(V); + if (Instruction *I = dyn_cast(V)) + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + markMemoryUsersTouched(MA); + } +} + +// Process the fact that Edge (from, to) is reachable, including marking +// any newly reachable blocks and instructions for processing. +void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { + // Check if the Edge was reachable before. + if (ReachableEdges.insert({From, To}).second) { + // If this block wasn't reachable before, all instructions are touched. + if (ReachableBlocks.insert(To).second) { + DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n"); + const auto &InstRange = BlockInstRange.lookup(To); + TouchedInstructions.set(InstRange.first, InstRange.second); + } else { + DEBUG(dbgs() << "Block " << getBlockName(To) + << " was reachable, but new edge {" << getBlockName(From) + << "," << getBlockName(To) << "} to it found\n"); + + // We've made an edge reachable to an existing block, which may + // impact predicates. Otherwise, only mark the phi nodes as touched, as + // they are the only thing that depend on new edges. Anything using their + // values will get propagated to if necessary. + auto BI = To->begin(); + while (isa(BI)) { + TouchedInstructions.set(InstrDFS[&*BI]); + ++BI; + } + } + } +} + +// Given a predicate condition (from a switch, cmp, or whatever) and a block, +// see if we know some constant value for it already. +Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const { + auto Result = lookupOperandLeader(Cond, nullptr, B); + if (isa(Result)) + return Result; + return nullptr; +} + +// Process the outgoing edges of a block for reachability. +void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + // Evaluate reachability of terminator instruction. + BranchInst *BR; + if ((BR = dyn_cast(TI)) && BR->isConditional()) { + Value *Cond = BR->getCondition(); + Value *CondEvaluated = findConditionEquivalence(Cond, B); + if (!CondEvaluated) { + if (auto *I = dyn_cast(Cond)) { + const Expression *E = createExpression(I, B); + if (const auto *CE = dyn_cast(E)) { + CondEvaluated = CE->getConstantValue(); + } + } else if (isa(Cond)) { + CondEvaluated = Cond; + } + } + ConstantInt *CI; + BasicBlock *TrueSucc = BR->getSuccessor(0); + BasicBlock *FalseSucc = BR->getSuccessor(1); + if (CondEvaluated && (CI = dyn_cast(CondEvaluated))) { + if (CI->isOne()) { + DEBUG(dbgs() << "Condition for Terminator " << *TI + << " evaluated to true\n"); + updateReachableEdge(B, TrueSucc); + } else if (CI->isZero()) { + DEBUG(dbgs() << "Condition for Terminator " << *TI + << " evaluated to false\n"); + updateReachableEdge(B, FalseSucc); + } + } else { + updateReachableEdge(B, TrueSucc); + updateReachableEdge(B, FalseSucc); + } + } else if (auto *SI = dyn_cast(TI)) { + // For switches, propagate the case values into the case + // destinations. + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap SwitchEdges; + + bool MultipleEdgesOneReachable = false; + Value *SwitchCond = SI->getCondition(); + Value *CondEvaluated = findConditionEquivalence(SwitchCond, B); + // See if we were able to turn this switch statement into a constant. + if (CondEvaluated && isa(CondEvaluated)) { + ConstantInt *CondVal = cast(CondEvaluated); + // We should be able to get case value for this. + auto CaseVal = SI->findCaseValue(CondVal); + if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) { + // We proved the value is outside of the range of the case. + // We can't do anything other than mark the default dest as reachable, + // and go home. + updateReachableEdge(B, SI->getDefaultDest()); + return; + } + // Now get where it goes and mark it reachable. + BasicBlock *TargetBlock = CaseVal.getCaseSuccessor(); + updateReachableEdge(B, TargetBlock); + unsigned WhichSucc = CaseVal.getSuccessorIndex(); + // Calculate whether our single reachable edge is really a single edge to + // the target block. If not, and the block has multiple predecessors, we + // can only replace phi node values. + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + if (i == WhichSucc) + continue; + BasicBlock *Block = SI->getSuccessor(i); + if (Block == TargetBlock) + MultipleEdgesOneReachable = true; + } + } else { + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = SI->getSuccessor(i); + ++SwitchEdges[TargetBlock]; + updateReachableEdge(B, TargetBlock); + } + } + } else { + // Otherwise this is either unconditional, or a type we have no + // idea about. Just mark successors as reachable. + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = TI->getSuccessor(i); + updateReachableEdge(B, TargetBlock); + } + } +} + +// The algorithm initially places the values of the routine in the INITIAL congruence +// class. The leader of INITIAL is the undetermined value `TOP`. +// When the algorithm has finished, values still in INITIAL are unreachable. +void NewGVN::initializeCongruenceClasses(Function &F) { + // FIXME now i can't remember why this is 2 + NextCongruenceNum = 2; + // Initialize all other instructions to be in INITIAL class. + CongruenceClass::MemberSet InitialValues; + for (auto &B : F) + for (auto &I : B) + InitialValues.insert(&I); + + InitialClass = createCongruenceClass(NULL, NULL); + for (auto L : InitialValues) + ValueToClass[L] = InitialClass; + InitialClass->Members.swap(InitialValues); + + // Initialize arguments to be in their own unique congruence classes + for (auto &FA : F.args()) + createSingletonCongruenceClass(&FA); +} + +void NewGVN::cleanupTables() { + for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) { + DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has " + << CongruenceClasses[i]->Members.size() << " members\n"); + // Make sure we delete the congruence class (probably worth switching to + // a unique_ptr at some point. + delete CongruenceClasses[i]; + CongruenceClasses[i] = NULL; + } + + ValueToClass.clear(); + ArgRecycler.clear(ExpressionAllocator); + ExpressionAllocator.Reset(); + CongruenceClasses.clear(); + ExpressionToClass.clear(); + ValueToExpression.clear(); + ReachableBlocks.clear(); + ReachableEdges.clear(); +#ifndef NDEBUG + ProcessedCount.clear(); +#endif + DFSDomMap.clear(); + InstrDFS.clear(); + InstructionsToErase.clear(); + + DFSToInstr.clear(); + BlockInstRange.clear(); + TouchedInstructions.clear(); + DominatedInstRange.clear(); +} + +std::pair NewGVN::assignDFSNumbers(BasicBlock *B, + unsigned Start) { + unsigned End = Start; + for (auto &I : *B) { + InstrDFS[&I] = End++; + DFSToInstr.emplace_back(&I); + } + + // All of the range functions taken half-open ranges (open on the end side). + // So we do not subtract one from count, because at this point it is one + // greater than the last instruction. + return std::make_pair(Start, End); +} + +void NewGVN::updateProcessedCount(Value *V) { +#ifndef NDEBUG + if (ProcessedCount.count(V) == 0) { + ProcessedCount.insert({V, 1}); + } else { + ProcessedCount[V] += 1; + assert(ProcessedCount[V] < 100 && + "Seem to have processed the same Value a lot\n"); + } +#endif +} + +// This is the main transformation entry point. +bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, + TargetLibraryInfo *_TLI, AliasAnalysis *_AA, + MemorySSA *_MSSA) { + bool Changed = false; + DT = _DT; + AC = _AC; + TLI = _TLI; + AA = _AA; + MSSA = _MSSA; + DL = &F.getParent()->getDataLayout(); + MSSAWalker = MSSA->getWalker(); + + // Count number of instructions for sizing of hash tables, and come + // up with a global dfs numbering for instructions. + unsigned ICount = 0; + SmallPtrSet VisitedBlocks; + + // Note: We want RPO traversal of the blocks, which is not quite the same as + // dominator tree order, particularly with regard whether backedges get + // visited first or second, given a block with multiple successors. + // If we visit in the wrong order, we will end up performing N times as many + // iterations. + ReversePostOrderTraversal RPOT(&F); + for (auto &B : RPOT) { + VisitedBlocks.insert(B); + const auto &BlockRange = assignDFSNumbers(B, ICount); + BlockInstRange.insert({B, BlockRange}); + ICount += BlockRange.second - BlockRange.first; + } + + // Handle forward unreachable blocks and figure out which blocks + // have single preds. + for (auto &B : F) { + // Assign numbers to unreachable blocks. + if (!VisitedBlocks.count(&B)) { + const auto &BlockRange = assignDFSNumbers(&B, ICount); + BlockInstRange.insert({&B, BlockRange}); + ICount += BlockRange.second - BlockRange.first; + } + } + + TouchedInstructions.resize(ICount + 1); + DominatedInstRange.reserve(F.size()); + // Ensure we don't end up resizing the expressionToClass map, as + // that can be quite expensive. At most, we have one expression per + // instruction. + ExpressionToClass.reserve(ICount + 1); + + // Initialize the touched instructions to include the entry block. + const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); + TouchedInstructions.set(InstRange.first, InstRange.second); + ReachableBlocks.insert(&F.getEntryBlock()); + + initializeCongruenceClasses(F); + + // We start out in the entry block. + BasicBlock *LastBlock = &F.getEntryBlock(); + while (TouchedInstructions.any()) { + // Walk through all the instructions in all the blocks in RPO. + for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; + InstrNum = TouchedInstructions.find_next(InstrNum)) { + Instruction *I = DFSToInstr[InstrNum]; + BasicBlock *CurrBlock = I->getParent(); + + // If we hit a new block, do reachability processing. + if (CurrBlock != LastBlock) { + LastBlock = CurrBlock; + bool BlockReachable = ReachableBlocks.count(CurrBlock); + const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock); + + // If it's not reachable, erase any touched instructions and move on. + if (!BlockReachable) { + TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second); + DEBUG(dbgs() << "Skipping instructions in block " + << getBlockName(CurrBlock) + << " because it is unreachable\n"); + continue; + } + updateProcessedCount(CurrBlock); + } + DEBUG(dbgs() << "Processing instruction " << *I << "\n"); + if (I->use_empty() && !I->getType()->isVoidTy()) { + DEBUG(dbgs() << "Skipping unused instruction\n"); + if (isInstructionTriviallyDead(I, TLI)) + markInstructionForDeletion(I); + TouchedInstructions.reset(InstrNum); + continue; + } + updateProcessedCount(I); + + if (!I->isTerminator()) { + const Expression *Symbolized = performSymbolicEvaluation(I, CurrBlock); + performCongruenceFinding(I, Symbolized); + } else { + processOutgoingEdges(dyn_cast(I), CurrBlock); + } + // Reset after processing (because we may mark ourselves as touched when + // we propagate equalities). + TouchedInstructions.reset(InstrNum); + } + } + + Changed |= eliminateInstructions(F); + + // Delete all instructions marked for deletion. + for (Instruction *ToErase : InstructionsToErase) { + if (!ToErase->use_empty()) + ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); + + ToErase->eraseFromParent(); + } + + // Delete all unreachable blocks. + for (auto &B : F) { + BasicBlock *BB = &B; + if (!ReachableBlocks.count(BB)) { + DEBUG(dbgs() << "We believe block " << getBlockName(BB) + << " is unreachable\n"); + deleteInstructionsInBlock(BB); + Changed = true; + } + } + + cleanupTables(); + return Changed; +} + +bool NewGVN::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + return runGVN(F, &getAnalysis().getDomTree(), + &getAnalysis().getAssumptionCache(F), + &getAnalysis().getTLI(), + &getAnalysis().getAAResults(), + &getAnalysis().getMSSA()); +} + +PreservedAnalyses NewGVNPass::run(Function &F, + AnalysisManager &AM) { + NewGVN Impl; + + // Apparently the order in which we get these results matter for + // the old GVN (see Chandler's comment in GVN.cpp). I'll keep + // the same order here, just in case. + auto &AC = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &TLI = AM.getResult(F); + auto &AA = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); + bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; +} + +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + +// Get the basic block from an instruction/value. +static BasicBlock *getBlockForValue(Value *V) { + if (auto *I = dyn_cast(V)) + return I->getParent(); + return nullptr; +} + +struct NewGVN::ValueDFS { + int DFSIn; + int DFSOut; + int LocalNum; + // Only one of these will be set. + Value *Val; + Use *U; + ValueDFS() + : DFSIn(0), DFSOut(0), LocalNum(0), Val(nullptr), U(nullptr) {} + + bool operator<(const ValueDFS &Other) const { + // It's not enough that any given field be less than - we have sets + // of fields that need to be evaluated together to give a proper ordering. + // For example, if you have; + // DFS (1, 3) + // Val 0 + // DFS (1, 2) + // Val 50 + // We want the second to be less than the first, but if we just go field + // by field, we will get to Val 0 < Val 50 and say the first is less than + // the second. We only want it to be less than if the DFS orders are equal. + // + // Each LLVM instruction only produces one value, and thus the lowest-level + // differentiator that really matters for the stack (and what we use as as a + // replacement) is the local dfs number. + // Everything else in the structure is instruction level, and only affects the + // order in which we will replace operands of a given instruction. + // + // For a given instruction (IE things with equal dfsin, dfsout, localnum), + // the order of replacement of uses does not matter. + // IE given, + // a = 5 + // b = a + a + // When you hit b, you will have two valuedfs with the same dfsin, out, and localnum. + // The .val will be the same as well. + // The .u's will be different. + // You will replace both, and it does not matter what order you replace them in + // (IE whether you replace operand 2, then operand 1, or operand 1, then operand 2). + // Similarly for the case of same dfsin, dfsout, localnum, but different .val's + // a = 5 + // b = 6 + // c = a + b + // in c, we will a valuedfs for a, and one for b,with everything the same but + // .val and .u. + // It does not matter what order we replace these operands in. + // You will always end up with the same IR, and this is guaranteed. + return std::tie(DFSIn, DFSOut, LocalNum, Val, U) < + std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val, + Other.U); + } +}; + +void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, + std::vector &DFSOrderedSet) { + for (auto D : Dense) { + // First add the value. + BasicBlock *BB = getBlockForValue(D); + // Constants are handled prior to ever calling this function, so + // we should only be left with instructions as members. + assert(BB || "Should have figured out a basic block for value"); + ValueDFS VD; + + std::pair DFSPair = DFSDomMap[BB]; + assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair"); + VD.DFSIn = DFSPair.first; + VD.DFSOut = DFSPair.second; + VD.Val = D; + // If it's an instruction, use the real local dfs number. + if (auto *I = dyn_cast(D)) + VD.LocalNum = InstrDFS[I]; + else + llvm_unreachable("Should have been an instruction"); + + DFSOrderedSet.emplace_back(VD); + + // Now add the users. + for (auto &U : D->uses()) { + if (auto *I = dyn_cast(U.getUser())) { + ValueDFS VD; + // Put the phi node uses in the incoming block. + BasicBlock *IBlock; + if (auto *P = dyn_cast(I)) { + IBlock = P->getIncomingBlock(U); + // Make phi node users appear last in the incoming block + // they are from. + VD.LocalNum = InstrDFS.size() + 1; + } else { + IBlock = I->getParent(); + VD.LocalNum = InstrDFS[I]; + } + std::pair DFSPair = DFSDomMap[IBlock]; + VD.DFSIn = DFSPair.first; + VD.DFSOut = DFSPair.second; + VD.U = &U; + DFSOrderedSet.emplace_back(VD); + } + } + } +} + +static void patchReplacementInstruction(Instruction *I, Value *Repl) { + // Patch the replacement so that it is not more restrictive than the value + // being replaced. + auto *Op = dyn_cast(I); + auto *ReplOp = dyn_cast(Repl); + + if (Op && ReplOp) + ReplOp->andIRFlags(Op); + + if (auto *ReplInst = dyn_cast(Repl)) { + // FIXME: If both the original and replacement value are part of the + // same control-flow region (meaning that the execution of one + // guarentees the executation of the other), then we can combine the + // noalias scopes here and do better than the general conservative + // answer used in combineMetadata(). + + // In general, GVN unifies expressions over different control-flow + // regions, and so we need a conservative combination of the noalias + // scopes. + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group}; + combineMetadata(ReplInst, I, KnownIDs); + } +} + +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); + I->replaceAllUsesWith(Repl); +} + +void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { + DEBUG(dbgs() << " BasicBlock Dead:" << *BB); + ++NumGVNBlocksDeleted; + + // Check to see if there are non-terminating instructions to delete. + if (isa(BB->begin())) + return; + + // Delete the instructions backwards, as it has a reduced likelihood of having + // to update as many def-use and use-def chains. Start after the terminator. + auto StartPoint = BB->rbegin(); + ++StartPoint; + // Note that we explicitly recalculate BB->rend() on each iteration, + // as it may change when we remove the first instruction. + for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend();) { + Instruction &Inst = *I++; + if (!Inst.use_empty()) + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + if (isa(Inst)) + continue; + + Inst.eraseFromParent(); + ++NumGVNInstrDeleted; + } +} + +void NewGVN::markInstructionForDeletion(Instruction *I) { + DEBUG(dbgs() << "Marking " << *I << " for deletion\n"); + InstructionsToErase.insert(I); +} + +void NewGVN::replaceInstruction(Instruction *I, Value *V) { + + DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n"); + patchAndReplaceAllUsesWith(I, V); + // We save the actual erasing to avoid invalidating memory + // dependencies until we are done with everything. + markInstructionForDeletion(I); +} + +namespace { + +// This is a stack that contains both the value and dfs info of where +// that value is valid. +class ValueDFSStack { +public: + Value *back() const { return ValueStack.back(); } + std::pair dfs_back() const { return DFSStack.back(); } + + void push_back(Value *V, int DFSIn, int DFSOut) { + ValueStack.emplace_back(V); + DFSStack.emplace_back(DFSIn, DFSOut); + } + bool empty() const { return DFSStack.empty(); } + bool isInScope(int DFSIn, int DFSOut) const { + if (empty()) + return false; + return DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second; + } + + void popUntilDFSScope(int DFSIn, int DFSOut) { + + // These two should always be in sync at this point. + assert(ValueStack.size() == DFSStack.size() && + "Mismatch between ValueStack and DFSStack"); + while ( + !DFSStack.empty() && + !(DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second)) { + DFSStack.pop_back(); + ValueStack.pop_back(); + } + } + +private: + SmallVector ValueStack; + SmallVector, 8> DFSStack; +}; +} + +bool NewGVN::eliminateInstructions(Function &F) { + // This is a non-standard eliminator. The normal way to eliminate is + // to walk the dominator tree in order, keeping track of available + // values, and eliminating them. However, this is mildly + // pointless. It requires doing lookups on every instruction, + // regardless of whether we will ever eliminate it. For + // instructions part of most singleton congruence class, we know we + // will never eliminate it. + + // Instead, this eliminator looks at the congruence classes directly, sorts + // them into a DFS ordering of the dominator tree, and then we just + // perform eliminate straight on the sets by walking the congruence + // class member uses in order, and eliminate the ones dominated by the + // last member. This is technically O(N log N) where N = number of + // instructions (since in theory all instructions may be in the same + // congruence class). + // When we find something not dominated, it becomes the new leader + // for elimination purposes + + bool AnythingReplaced = false; + + // Since we are going to walk the domtree anyway, and we can't guarantee the + // DFS numbers are updated, we compute some ourselves. + DT->updateDFSNumbers(); + + for (auto &B : F) { + if (!ReachableBlocks.count(&B)) { + for (const auto S : successors(&B)) { + for (auto II = S->begin(); isa(II); ++II) { + PHINode &Phi = cast(*II); + DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " + << getBlockName(&B) + << " with undef due to it being unreachable\n"); + for (auto &Operand : Phi.incoming_values()) + if (Phi.getIncomingBlock(Operand) == &B) + Operand.set(UndefValue::get(Phi.getType())); + } + } + } + DomTreeNode *Node = DT->getNode(&B); + if (Node) + DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()}; + } + + for (CongruenceClass *CC : CongruenceClasses) { + // FIXME: We should eventually be able to replace everything still + // in the initial class with undef, as they should be unreachable. + // Right now, initial still contains some things we skip value + // numbering of (UNREACHABLE's, for example). + if (CC == InitialClass || CC->Dead) + continue; + assert(CC->RepLeader && "We should have had a leader"); + + // If this is a leader that is always available, and it's a + // constant or has no equivalences, just replace everything with + // it. We then update the congruence class with whatever members + // are left. + if (alwaysAvailable(CC->RepLeader)) { + SmallPtrSet MembersLeft; + for (auto M : CC->Members) { + + Value *Member = M; + + // Void things have no uses we can replace. + if (Member == CC->RepLeader || Member->getType()->isVoidTy()) { + MembersLeft.insert(Member); + continue; + } + + DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for " + << *Member << "\n"); + // Due to equality propagation, these may not always be + // instructions, they may be real values. We don't really + // care about trying to replace the non-instructions. + if (auto *I = dyn_cast(Member)) { + assert(CC->RepLeader != I && + "About to accidentally remove our leader"); + replaceInstruction(I, CC->RepLeader); + AnythingReplaced = true; + + continue; + } else { + MembersLeft.insert(I); + } + } + CC->Members.swap(MembersLeft); + + } else { + DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n"); + // If this is a singleton, we can skip it. + if (CC->Members.size() != 1) { + + // This is a stack because equality replacement/etc may place + // constants in the middle of the member list, and we want to use + // those constant values in preference to the current leader, over + // the scope of those constants. + ValueDFSStack EliminationStack; + + // Convert the members to DFS ordered sets and then merge them. + std::vector DFSOrderedSet; + convertDenseToDFSOrdered(CC->Members, DFSOrderedSet); + + // Sort the whole thing. + sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); + + for (auto &C : DFSOrderedSet) { + int MemberDFSIn = C.DFSIn; + int MemberDFSOut = C.DFSOut; + Value *Member = C.Val; + Use *MemberUse = C.U; + + // We ignore void things because we can't get a value from them. + if (Member && Member->getType()->isVoidTy()) + continue; + + if (EliminationStack.empty()) { + DEBUG(dbgs() << "Elimination Stack is empty\n"); + } else { + DEBUG(dbgs() << "Elimination Stack Top DFS numbers are (" + << EliminationStack.dfs_back().first << "," + << EliminationStack.dfs_back().second << ")\n"); + } + if (Member && isa(Member)) + assert(isa(CC->RepLeader)); + + DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," + << MemberDFSOut << ")\n"); + // First, we see if we are out of scope or empty. If so, + // and there equivalences, we try to replace the top of + // stack with equivalences (if it's on the stack, it must + // not have been eliminated yet). + // Then we synchronize to our current scope, by + // popping until we are back within a DFS scope that + // dominates the current member. + // Then, what happens depends on a few factors + // If the stack is now empty, we need to push + // If we have a constant or a local equivalence we want to + // start using, we also push. + // Otherwise, we walk along, processing members who are + // dominated by this scope, and eliminate them. + bool ShouldPush = + Member && (EliminationStack.empty() || isa(Member)); + bool OutOfScope = + !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut); + + if (OutOfScope || ShouldPush) { + // Sync to our current scope. + EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); + ShouldPush |= Member && EliminationStack.empty(); + if (ShouldPush) { + EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + } + } + + // If we get to this point, and the stack is empty we must have a use + // with nothing we can use to eliminate it, just skip it. + if (EliminationStack.empty()) + continue; + + // Skip the Value's, we only want to eliminate on their uses. + if (Member) + continue; + Value *Result = EliminationStack.back(); + + // Don't replace our existing users with ourselves. + if (MemberUse->get() == Result) + continue; + + DEBUG(dbgs() << "Found replacement " << *Result << " for " + << *MemberUse->get() << " in " << *(MemberUse->getUser()) + << "\n"); + + // If we replaced something in an instruction, handle the patching of + // metadata. + if (auto *ReplacedInst = + dyn_cast(MemberUse->get())) + patchReplacementInstruction(ReplacedInst, Result); + + assert(isa(MemberUse->getUser())); + MemberUse->set(Result); + AnythingReplaced = true; + } + } + } + + // Cleanup the congruence class. + SmallPtrSet MembersLeft; + for (auto MI = CC->Members.begin(), ME = CC->Members.end(); MI != ME;) { + auto CurrIter = MI; + ++MI; + Value *Member = *CurrIter; + if (Member->getType()->isVoidTy()) { + MembersLeft.insert(Member); + continue; + } + + if (auto *MemberInst = dyn_cast(Member)) { + if (isInstructionTriviallyDead(MemberInst)) { + // TODO: Don't mark loads of undefs. + markInstructionForDeletion(MemberInst); + continue; + } + } + MembersLeft.insert(Member); + } + CC->Members.swap(MembersLeft); + } + + return AnythingReplaced; +} + diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index e6dba08..afe7483 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -43,6 +43,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDSELegacyPassPass(Registry); initializeGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); + initializeNewGVNPass(Registry); initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); @@ -126,6 +127,10 @@ void LLVMAddGVNPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createGVNPass()); } +void LLVMAddNewGVNPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createNewGVNPass()); +} + void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createMergedLoadStoreMotionPass()); } diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll new file mode 100644 index 0000000..76ff899 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll @@ -0,0 +1,86 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.PerlInterpreter = type { i8 } +@PL_sv_count = external global i32 ; [#uses=2] + +define void @perl_destruct(%struct.PerlInterpreter* %sv_interp) { +entry: + br i1 false, label %cond_next25, label %cond_true16 + +cond_true16: ; preds = %entry + ret void + +cond_next25: ; preds = %entry + br i1 false, label %cond_next33, label %cond_true32 + +cond_true32: ; preds = %cond_next25 + ret void + +cond_next33: ; preds = %cond_next25 + br i1 false, label %cond_next61, label %cond_true.i46 + +cond_true.i46: ; preds = %cond_next33 + ret void + +cond_next61: ; preds = %cond_next33 + br i1 false, label %cond_next69, label %cond_true66 + +cond_true66: ; preds = %cond_next61 + ret void + +cond_next69: ; preds = %cond_next61 + br i1 false, label %Perl_safefree.exit52, label %cond_true.i50 + +cond_true.i50: ; preds = %cond_next69 + ret void + +Perl_safefree.exit52: ; preds = %cond_next69 + br i1 false, label %cond_next80, label %cond_true77 + +cond_true77: ; preds = %Perl_safefree.exit52 + ret void + +cond_next80: ; preds = %Perl_safefree.exit52 + br i1 false, label %Perl_safefree.exit56, label %cond_true.i54 + +cond_true.i54: ; preds = %cond_next80 + ret void + +Perl_safefree.exit56: ; preds = %cond_next80 + br i1 false, label %Perl_safefree.exit60, label %cond_true.i58 + +cond_true.i58: ; preds = %Perl_safefree.exit56 + ret void + +Perl_safefree.exit60: ; preds = %Perl_safefree.exit56 + br i1 false, label %Perl_safefree.exit64, label %cond_true.i62 + +cond_true.i62: ; preds = %Perl_safefree.exit60 + ret void + +Perl_safefree.exit64: ; preds = %Perl_safefree.exit60 + br i1 false, label %Perl_safefree.exit68, label %cond_true.i66 + +cond_true.i66: ; preds = %Perl_safefree.exit64 + ret void + +Perl_safefree.exit68: ; preds = %Perl_safefree.exit64 + br i1 false, label %cond_next150, label %cond_true23.i + +cond_true23.i: ; preds = %Perl_safefree.exit68 + ret void + +cond_next150: ; preds = %Perl_safefree.exit68 + %tmp16092 = load i32, i32* @PL_sv_count, align 4 ; [#uses=0] + br label %cond_next165 + +bb157: ; preds = %cond_next165 + %tmp158 = load i32, i32* @PL_sv_count, align 4 ; [#uses=0] + br label %cond_next165 + +cond_next165: ; preds = %bb157, %cond_next150 + br i1 false, label %bb171, label %bb157 + +bb171: ; preds = %cond_next165 + ret void +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll new file mode 100644 index 0000000..fcbfb4c --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + + %struct.INT2 = type { i32, i32 } +@blkshifts = external global %struct.INT2* ; <%struct.INT2**> [#uses=2] + +define i32 @xcompact() { +entry: + store %struct.INT2* null, %struct.INT2** @blkshifts, align 4 + br label %bb + +bb: ; preds = %bb, %entry + %tmp10 = load %struct.INT2*, %struct.INT2** @blkshifts, align 4 ; <%struct.INT2*> [#uses=0] +; CHECK-NOT: %tmp10 + br label %bb +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll new file mode 100644 index 0000000..aea5b72 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-25-Loop.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float } + +define void @print_arch(i8* %arch_file, i32 %route_type, i64 %det_routing_arch.0.0, i64 %det_routing_arch.0.1, i64 %det_routing_arch.0.2, i64 %det_routing_arch.0.3, i64 %det_routing_arch.0.4, %struct.s_segment_inf* %segment_inf, i64 %timing_inf.0.0, i64 %timing_inf.0.1, i64 %timing_inf.0.2, i64 %timing_inf.0.3, i64 %timing_inf.0.4, i32 %timing_inf.1) { +entry: + br i1 false, label %bb278, label %bb344 + +bb278: ; preds = %bb278, %entry + br i1 false, label %bb278, label %bb344 + +bb344: ; preds = %bb278, %entry + %tmp38758 = load i16, i16* null, align 2 ; [#uses=0] + ret void +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll b/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll new file mode 100644 index 0000000..6346824 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 } + +define %struct.TypHeader* @LtRec(%struct.TypHeader* %hdL, %struct.TypHeader* %hdR) { +entry: + br i1 false, label %bb556.preheader, label %bb534.preheader + +bb534.preheader: ; preds = %entry + ret %struct.TypHeader* null + +bb556.preheader: ; preds = %entry + %tmp56119 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0 ; [#uses=1] + %tmp56220 = load i32, i32* %tmp56119 ; [#uses=0] + br i1 false, label %bb.nph23, label %bb675.preheader + +bb.nph23: ; preds = %bb556.preheader + ret %struct.TypHeader* null + +bb656: ; preds = %bb675.outer, %bb656 + %tmp678 = load i32, i32* %tmp677 ; [#uses=0] + br i1 false, label %bb684, label %bb656 + +bb684: ; preds = %bb675.outer, %bb656 + br i1 false, label %bb924.preheader, label %bb675.outer + +bb675.outer: ; preds = %bb675.preheader, %bb684 + %tmp67812 = load i32, i32* %tmp67711 ; [#uses=0] + br i1 false, label %bb684, label %bb656 + +bb675.preheader: ; preds = %bb556.preheader + %tmp67711 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0 ; [#uses=1] + %tmp677 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0 ; [#uses=1] + br label %bb675.outer + +bb924.preheader: ; preds = %bb684 + ret %struct.TypHeader* null +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll b/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll new file mode 100644 index 0000000..dfbdac0 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.ggBRDF = type { i32 (...)** } + %struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 } + %struct.ggMaterialRecord = type { %struct.ggPoint2, %struct.ggBox3, %struct.ggBox3, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, i32, i32, i32, i32 } + %struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 } + %struct.ggPoint2 = type { [2 x double] } + %struct.ggPoint3 = type { [3 x double] } + %struct.ggSpectrum = type { [8 x float] } + %struct.mrViewingHitRecord = type { double, %struct.ggPoint3, %struct.ggONB3, %struct.ggPoint2, double, %struct.ggSpectrum, %struct.ggSpectrum, i32, i32, i32, i32 } + %struct.mrXEllipticalCylinder = type { %struct.ggBRDF, float, float, float, float, float, float } + +define i32 @_ZNK21mrZEllipticalCylinder10viewingHitERK6ggRay3dddR18mrViewingHitRecordR16ggMaterialRecord(%struct.mrXEllipticalCylinder* %this, %struct.ggBox3* %ray, double %unnamed_arg, double %tmin, double %tmax, %struct.mrViewingHitRecord* %VHR, %struct.ggMaterialRecord* %unnamed_arg2) { +entry: + %tmp80.i = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0 ; [#uses=1] + store double 0.000000e+00, double* %tmp80.i + br i1 false, label %return, label %cond_next.i + +cond_next.i: ; preds = %entry + br i1 false, label %return, label %cond_true + +cond_true: ; preds = %cond_next.i + %tmp3.i8 = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0 ; [#uses=1] + %tmp46 = load double, double* %tmp3.i8 ; [#uses=0] + ret i32 1 + +return: ; preds = %cond_next.i, %entry + ret i32 0 +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll b/llvm/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll new file mode 100644 index 0000000..1eb90ad --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +@last = external global [65 x i32*] + +define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) { +entry: + %A = alloca i32* + %tmp17618 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4 + store i32* %tmp17618, i32** %A +; CHECK: entry: +; CHECK-NEXT: alloca i32 +; CHECK-NEXT: %tmp17618 = load +; CHECK-NOT: load +; CHECK-NOT: phi + br label %cond_true116 + +cond_true116: + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %cond_true128, label %cond_true145 + +cond_true128: + %tmp17625 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4 + store i32* %tmp17625, i32** %A + %cmp1 = icmp eq i32 %x, %z + br i1 %cmp1 , label %bb98.backedge, label %return.loopexit + +bb98.backedge: + br label %cond_true116 + +cond_true145: + %tmp17631 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4 + store i32* %tmp17631, i32** %A + br i1 false, label %bb98.backedge, label %return.loopexit + +return.loopexit: + br label %return + +return: + ret i32 0 +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll b/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll new file mode 100644 index 0000000..344af2c --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -newgvn | llvm-dis + +@bsLive = external global i32 ; [#uses=2] + +define i32 @bsR(i32 %n) { +entry: + br i1 false, label %cond_next, label %bb19 + +cond_next: ; preds = %entry + store i32 0, i32* @bsLive, align 4 + br label %bb19 + +bb19: ; preds = %cond_next, %entry + %tmp29 = load i32, i32* @bsLive, align 4 ; [#uses=0] + ret i32 0 +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll b/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll new file mode 100644 index 0000000..402de50 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll @@ -0,0 +1,45 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s + + %struct..0anon = type { i32 } + %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } + %struct.__sFILEX = type opaque + %struct.__sbuf = type { i8*, i32 } + %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] } +@n_spills = external global i32 ; [#uses=2] + +define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) { +cond_next2835.1: ; preds = %cond_next2861 + %tmp2922 = load i32, i32* @n_spills, align 4 ; [#uses=0] + br label %bb2928 + +bb2928: ; preds = %cond_next2835.1, %cond_next2943 + br i1 false, label %cond_next2943, label %cond_true2935 + +cond_true2935: ; preds = %bb2928 + br label %cond_next2943 + +cond_next2943: ; preds = %cond_true2935, %bb2928 + br i1 false, label %bb2982.preheader, label %bb2928 + +bb2982.preheader: ; preds = %cond_next2943 + %tmp298316 = load i32, i32* @n_spills, align 4 ; [#uses=0] + ret i32 %tmp298316 + +} + +; CHECK: define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) { +; CHECK-NEXT: cond_next2835.1: +; CHECK-NEXT: br label %bb2928 +; CHECK: bb2928: +; CHECK-NEXT: br i1 false, label %bb2928.cond_next2943_crit_edge, label %cond_true2935 +; CHECK: bb2928.cond_next2943_crit_edge: +; CHECK-NEXT: br label %cond_next2943 +; CHECK: cond_true2935: +; CHECK-NEXT: br label %cond_next2943 +; CHECK: cond_next2943: +; CHECK-NEXT: br i1 false, label %bb2982.preheader, label %bb2928 +; CHECK: bb2982.preheader: +; CHECK-NEXT: %tmp298316 = load i32, i32* @n_spills, align 4 +; CHECK-NEXT: ret i32 %tmp298316 +; CHECK-NEXT: } diff --git a/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll b/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll new file mode 100644 index 0000000..ddb1e9a --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-30-PredIDom.ll @@ -0,0 +1,274 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %"struct.Block::$_16" = type { i32 } + %struct.Exp = type { %struct.Exp_*, i32, i32, i32, %struct.Exp*, %struct.Exp*, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" } + %"struct.Exp::$_10" = type { %struct.Exp* } + %"struct.Exp::$_12" = type { %struct.Exp** } + %struct.Exp_ = type { i32, i32, i32, i32, %struct.Id* } + %struct.Id = type { i8*, i32, i32, i32, %"struct.Id::$_13" } + %"struct.Id::$_13" = type { double } + +define i8* @_ZN3Exp8toStringEj(%struct.Exp* %this, i32 %nextpc) { +entry: + switch i32 0, label %bb970 [ + i32 1, label %bb + i32 2, label %bb39 + i32 3, label %bb195 + i32 4, label %bb270 + i32 5, label %bb418 + i32 6, label %bb633 + i32 7, label %bb810 + i32 8, label %bb882 + i32 9, label %bb925 + ] + +bb: ; preds = %entry + store i8* null, i8** null + br label %return + +bb39: ; preds = %entry + br i1 false, label %cond_true, label %cond_false132 + +cond_true: ; preds = %bb39 + br i1 false, label %cond_true73, label %cond_false + +cond_true73: ; preds = %cond_true + br i1 false, label %cond_true108, label %cond_next + +cond_true108: ; preds = %cond_true73 + br label %cond_next + +cond_next: ; preds = %cond_true108, %cond_true73 + br label %cond_next131 + +cond_false: ; preds = %cond_true + br label %cond_next131 + +cond_next131: ; preds = %cond_false, %cond_next + br label %cond_next141 + +cond_false132: ; preds = %bb39 + br label %cond_next141 + +cond_next141: ; preds = %cond_false132, %cond_next131 + br i1 false, label %cond_true169, label %cond_false175 + +cond_true169: ; preds = %cond_next141 + br label %cond_next181 + +cond_false175: ; preds = %cond_next141 + br label %cond_next181 + +cond_next181: ; preds = %cond_false175, %cond_true169 + br i1 false, label %cond_true189, label %cond_next191 + +cond_true189: ; preds = %cond_next181 + br label %cond_next191 + +cond_next191: ; preds = %cond_true189, %cond_next181 + store i8* null, i8** null + br label %return + +bb195: ; preds = %entry + br i1 false, label %cond_true248, label %cond_false250 + +cond_true248: ; preds = %bb195 + br label %cond_next252 + +cond_false250: ; preds = %bb195 + br label %cond_next252 + +cond_next252: ; preds = %cond_false250, %cond_true248 + br i1 false, label %cond_true265, label %cond_next267 + +cond_true265: ; preds = %cond_next252 + br label %cond_next267 + +cond_next267: ; preds = %cond_true265, %cond_next252 + store i8* null, i8** null + br label %return + +bb270: ; preds = %entry + br i1 false, label %cond_true338, label %cond_false340 + +cond_true338: ; preds = %bb270 + br label %cond_next342 + +cond_false340: ; preds = %bb270 + br label %cond_next342 + +cond_next342: ; preds = %cond_false340, %cond_true338 + br i1 false, label %cond_true362, label %cond_false364 + +cond_true362: ; preds = %cond_next342 + br label %cond_next366 + +cond_false364: ; preds = %cond_next342 + br label %cond_next366 + +cond_next366: ; preds = %cond_false364, %cond_true362 + br i1 false, label %cond_true393, label %cond_next395 + +cond_true393: ; preds = %cond_next366 + br label %cond_next395 + +cond_next395: ; preds = %cond_true393, %cond_next366 + br i1 false, label %cond_true406, label %cond_next408 + +cond_true406: ; preds = %cond_next395 + br label %cond_next408 + +cond_next408: ; preds = %cond_true406, %cond_next395 + br i1 false, label %cond_true413, label %cond_next415 + +cond_true413: ; preds = %cond_next408 + br label %cond_next415 + +cond_next415: ; preds = %cond_true413, %cond_next408 + store i8* null, i8** null + br label %return + +bb418: ; preds = %entry + br i1 false, label %cond_true512, label %cond_false514 + +cond_true512: ; preds = %bb418 + br label %cond_next516 + +cond_false514: ; preds = %bb418 + br label %cond_next516 + +cond_next516: ; preds = %cond_false514, %cond_true512 + br i1 false, label %cond_true536, label %cond_false538 + +cond_true536: ; preds = %cond_next516 + br label %cond_next540 + +cond_false538: ; preds = %cond_next516 + br label %cond_next540 + +cond_next540: ; preds = %cond_false538, %cond_true536 + br i1 false, label %cond_true560, label %cond_false562 + +cond_true560: ; preds = %cond_next540 + br label %cond_next564 + +cond_false562: ; preds = %cond_next540 + br label %cond_next564 + +cond_next564: ; preds = %cond_false562, %cond_true560 + br i1 false, label %cond_true597, label %cond_next599 + +cond_true597: ; preds = %cond_next564 + br label %cond_next599 + +cond_next599: ; preds = %cond_true597, %cond_next564 + br i1 false, label %cond_true614, label %cond_next616 + +cond_true614: ; preds = %cond_next599 + br label %cond_next616 + +cond_next616: ; preds = %cond_true614, %cond_next599 + br i1 false, label %cond_true621, label %cond_next623 + +cond_true621: ; preds = %cond_next616 + br label %cond_next623 + +cond_next623: ; preds = %cond_true621, %cond_next616 + br i1 false, label %cond_true628, label %cond_next630 + +cond_true628: ; preds = %cond_next623 + br label %cond_next630 + +cond_next630: ; preds = %cond_true628, %cond_next623 + store i8* null, i8** null + br label %return + +bb633: ; preds = %entry + br i1 false, label %cond_true667, label %cond_next669 + +cond_true667: ; preds = %bb633 + br label %cond_next669 + +cond_next669: ; preds = %cond_true667, %bb633 + br i1 false, label %cond_true678, label %cond_next791 + +cond_true678: ; preds = %cond_next669 + br label %bb735 + +bb679: ; preds = %bb735 + br i1 false, label %cond_true729, label %cond_next731 + +cond_true729: ; preds = %bb679 + br label %cond_next731 + +cond_next731: ; preds = %cond_true729, %bb679 + br label %bb735 + +bb735: ; preds = %cond_next731, %cond_true678 + br i1 false, label %bb679, label %bb743 + +bb743: ; preds = %bb735 + br i1 false, label %cond_true788, label %cond_next790 + +cond_true788: ; preds = %bb743 + br label %cond_next790 + +cond_next790: ; preds = %cond_true788, %bb743 + br label %cond_next791 + +cond_next791: ; preds = %cond_next790, %cond_next669 + br i1 false, label %cond_true805, label %cond_next807 + +cond_true805: ; preds = %cond_next791 + br label %cond_next807 + +cond_next807: ; preds = %cond_true805, %cond_next791 + store i8* null, i8** null + br label %return + +bb810: ; preds = %entry + br i1 false, label %cond_true870, label %cond_next872 + +cond_true870: ; preds = %bb810 + br label %cond_next872 + +cond_next872: ; preds = %cond_true870, %bb810 + br i1 false, label %cond_true877, label %cond_next879 + +cond_true877: ; preds = %cond_next872 + br label %cond_next879 + +cond_next879: ; preds = %cond_true877, %cond_next872 + store i8* null, i8** null + br label %return + +bb882: ; preds = %entry + br i1 false, label %cond_true920, label %cond_next922 + +cond_true920: ; preds = %bb882 + br label %cond_next922 + +cond_next922: ; preds = %cond_true920, %bb882 + store i8* null, i8** null + br label %return + +bb925: ; preds = %entry + br i1 false, label %cond_true965, label %cond_next967 + +cond_true965: ; preds = %bb925 + br label %cond_next967 + +cond_next967: ; preds = %cond_true965, %bb925 + store i8* null, i8** null + br label %return + +bb970: ; preds = %entry + unreachable + ; No predecessors! + store i8* null, i8** null + br label %return + +return: ; preds = %0, %cond_next967, %cond_next922, %cond_next879, %cond_next807, %cond_next630, %cond_next415, %cond_next267, %cond_next191, %bb + %retval980 = load i8*, i8** null ; [#uses=1] + ret i8* %retval980 +} diff --git a/llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll b/llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll new file mode 100644 index 0000000..0fd7588 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll @@ -0,0 +1,315 @@ +; XFAIL: * +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + + %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } +@debug = external constant i32 ; [#uses=0] +@counters = external constant i32 ; [#uses=1] +@trialx = external global [17 x i32] ; <[17 x i32]*> [#uses=1] +@dummy1 = external global [7 x i32] ; <[7 x i32]*> [#uses=0] +@dummy2 = external global [4 x i32] ; <[4 x i32]*> [#uses=0] +@unacceptable = external global i32 ; [#uses=0] +@isa = external global [13 x %struct.anon] ; <[13 x %struct.anon]*> [#uses=3] +@.str = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str1 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str2 = external constant [1 x i8] ; <[1 x i8]*> [#uses=0] +@.str3 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str4 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str5 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str6 = external constant [2 x i8] ; <[2 x i8]*> [#uses=0] +@.str7 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str8 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str9 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str10 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str11 = external constant [2 x i8] ; <[2 x i8]*> [#uses=0] +@.str12 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str13 = external constant [2 x i8] ; <[2 x i8]*> [#uses=0] +@.str14 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str15 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str16 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str17 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str18 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str19 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str20 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str21 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str22 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str23 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str24 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str25 = external constant [6 x i8] ; <[6 x i8]*> [#uses=0] +@.str26 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str27 = external constant [6 x i8] ; <[6 x i8]*> [#uses=0] +@r = external global [17 x i32] ; <[17 x i32]*> [#uses=0] +@.str28 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str29 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@pgm = external global [5 x { i32, [3 x i32] }] ; <[5 x { i32, [3 x i32] }]*> [#uses=4] +@.str30 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str31 = external constant [13 x i8] ; <[13 x i8]*> [#uses=0] +@.str32 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str33 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str34 = external constant [20 x i8] ; <[20 x i8]*> [#uses=0] +@numi = external global i32 ; [#uses=7] +@.str35 = external constant [10 x i8] ; <[10 x i8]*> [#uses=0] +@counter = external global [5 x i32] ; <[5 x i32]*> [#uses=2] +@itrialx.2510 = external global i32 ; [#uses=0] +@.str36 = external constant [43 x i8] ; <[43 x i8]*> [#uses=0] +@.str37 = external constant [42 x i8] ; <[42 x i8]*> [#uses=0] +@corr_result = external global i32 ; [#uses=0] +@.str38 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str39 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str40 = external constant [47 x i8] ; <[47 x i8]*> [#uses=0] +@correct_result = external global [17 x i32] ; <[17 x i32]*> [#uses=1] +@.str41 = external constant [46 x i8] ; <[46 x i8]*> [#uses=0] +@.str42 = external constant [32 x i8] ; <[32 x i8]*> [#uses=0] +@.str43 = external constant [44 x i8] ; <[44 x i8]*> [#uses=1] +@.str44 = external constant [21 x i8] ; <[21 x i8]*> [#uses=1] +@.str45 = external constant [12 x i8] ; <[12 x i8]*> [#uses=1] +@.str46 = external constant [5 x i8] ; <[5 x i8]*> [#uses=1] +@.str47 = external constant [12 x i8] ; <[12 x i8]*> [#uses=1] + +declare i32 @neg(i32, i32, i32) + +declare i32 @Not(i32, i32, i32) + +declare i32 @pop(i32, i32, i32) + +declare i32 @nlz(i32, i32, i32) + +declare i32 @rev(i32, i32, i32) + +declare i32 @add(i32, i32, i32) + +declare i32 @sub(i32, i32, i32) + +declare i32 @mul(i32, i32, i32) + +declare i32 @divide(i32, i32, i32) + +declare i32 @divu(i32, i32, i32) + +declare i32 @And(i32, i32, i32) + +declare i32 @Or(i32, i32, i32) + +declare i32 @Xor(i32, i32, i32) + +declare i32 @rotl(i32, i32, i32) + +declare i32 @shl(i32, i32, i32) + +declare i32 @shr(i32, i32, i32) + +declare i32 @shrs(i32, i32, i32) + +declare i32 @cmpeq(i32, i32, i32) + +declare i32 @cmplt(i32, i32, i32) + +declare i32 @cmpltu(i32, i32, i32) + +declare i32 @seleq(i32, i32, i32) + +declare i32 @sellt(i32, i32, i32) + +declare i32 @selle(i32, i32, i32) + +declare void @print_expr(i32) + +declare i32 @printf(i8*, ...) + +declare i32 @putchar(i32) + +declare void @print_pgm() + +declare void @simulate_one_instruction(i32) + +declare i32 @check(i32) + +declare i32 @puts(i8*) + +declare void @fix_operands(i32) + +declare void @abort() + +declare i32 @increment() + +declare i32 @search() + +define i32 @main(i32 %argc, i8** %argv) { +entry: + %argc_addr = alloca i32 ; [#uses=1] + %argv_addr = alloca i8** ; [#uses=1] + %retval = alloca i32, align 4 ; [#uses=2] + %tmp = alloca i32, align 4 ; [#uses=2] + %i = alloca i32, align 4 ; [#uses=21] + %num_sol = alloca i32, align 4 ; [#uses=4] + %total = alloca i32, align 4 ; [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + store i32 0, i32* %num_sol + store i32 1, i32* @numi + br label %bb91 + +bb: ; preds = %cond_next97 + %tmp1 = load i32, i32* @numi ; [#uses=1] + %tmp2 = getelementptr [44 x i8], [44 x i8]* @.str43, i32 0, i32 0 ; [#uses=1] + %tmp3 = call i32 (i8*, ...) @printf( i8* %tmp2, i32 %tmp1 ) ; [#uses=0] + store i32 0, i32* %i + br label %bb13 + +bb4: ; preds = %bb13 + %tmp5 = load i32, i32* %i ; [#uses=1] + %tmp6 = load i32, i32* %i ; [#uses=1] + %tmp7 = getelementptr [17 x i32], [17 x i32]* @trialx, i32 0, i32 %tmp6 ; [#uses=1] + %tmp8 = load i32, i32* %tmp7 ; [#uses=1] + %tmp9 = call i32 @userfun( i32 %tmp8 ) ; [#uses=1] + %tmp10 = getelementptr [17 x i32], [17 x i32]* @correct_result, i32 0, i32 %tmp5 ; [#uses=1] + store i32 %tmp9, i32* %tmp10 + %tmp11 = load i32, i32* %i ; [#uses=1] + %tmp12 = add i32 %tmp11, 1 ; [#uses=1] + store i32 %tmp12, i32* %i + br label %bb13 + +bb13: ; preds = %bb4, %bb + %tmp14 = load i32, i32* %i ; [#uses=1] + %tmp15 = icmp sle i32 %tmp14, 16 ; [#uses=1] + %tmp1516 = zext i1 %tmp15 to i32 ; [#uses=1] + %toBool = icmp ne i32 %tmp1516, 0 ; [#uses=1] + br i1 %toBool, label %bb4, label %bb17 + +bb17: ; preds = %bb13 + store i32 0, i32* %i + br label %bb49 + +bb18: ; preds = %bb49 + %tmp19 = load i32, i32* %i ; [#uses=1] + %tmp20 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp19 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp21 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp20, i32 0, i32 0 ; [#uses=1] + store i32 0, i32* %tmp21 + %tmp22 = load i32, i32* %i ; [#uses=1] + %tmp23 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0 ; <%struct.anon*> [#uses=1] + %tmp24 = getelementptr %struct.anon, %struct.anon* %tmp23, i32 0, i32 3 ; <[3 x i32]*> [#uses=1] + %tmp25 = getelementptr [3 x i32], [3 x i32]* %tmp24, i32 0, i32 0 ; [#uses=1] + %tmp26 = load i32, i32* %tmp25 ; [#uses=1] + %tmp27 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp22 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp28 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp27, i32 0, i32 1 ; <[3 x i32]*> [#uses=1] + %tmp29 = getelementptr [3 x i32], [3 x i32]* %tmp28, i32 0, i32 0 ; [#uses=1] + store i32 %tmp26, i32* %tmp29 + %tmp30 = load i32, i32* %i ; [#uses=1] + %tmp31 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0 ; <%struct.anon*> [#uses=1] + %tmp32 = getelementptr %struct.anon, %struct.anon* %tmp31, i32 0, i32 3 ; <[3 x i32]*> [#uses=1] + %tmp33 = getelementptr [3 x i32], [3 x i32]* %tmp32, i32 0, i32 1 ; [#uses=1] + %tmp34 = load i32, i32* %tmp33 ; [#uses=1] + %tmp35 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp30 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp36 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp35, i32 0, i32 1 ; <[3 x i32]*> [#uses=1] + %tmp37 = getelementptr [3 x i32], [3 x i32]* %tmp36, i32 0, i32 1 ; [#uses=1] + store i32 %tmp34, i32* %tmp37 + %tmp38 = load i32, i32* %i ; [#uses=1] + %tmp39 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0 ; <%struct.anon*> [#uses=1] + %tmp40 = getelementptr %struct.anon, %struct.anon* %tmp39, i32 0, i32 3 ; <[3 x i32]*> [#uses=1] + %tmp41 = getelementptr [3 x i32], [3 x i32]* %tmp40, i32 0, i32 2 ; [#uses=1] + %tmp42 = load i32, i32* %tmp41 ; [#uses=1] + %tmp43 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp38 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp44 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp43, i32 0, i32 1 ; <[3 x i32]*> [#uses=1] + %tmp45 = getelementptr [3 x i32], [3 x i32]* %tmp44, i32 0, i32 2 ; [#uses=1] + store i32 %tmp42, i32* %tmp45 + %tmp46 = load i32, i32* %i ; [#uses=1] + call void @fix_operands( i32 %tmp46 ) + %tmp47 = load i32, i32* %i ; [#uses=1] +; CHECK: %tmp47 = phi i32 [ %tmp48, %bb18 ], [ 0, %bb17 ] + %tmp48 = add i32 %tmp47, 1 ; [#uses=1] + store i32 %tmp48, i32* %i + br label %bb49 + +bb49: ; preds = %bb18, %bb17 + %tmp50 = load i32, i32* @numi ; [#uses=1] + %tmp51 = load i32, i32* %i ; [#uses=1] + %tmp52 = icmp slt i32 %tmp51, %tmp50 ; [#uses=1] + %tmp5253 = zext i1 %tmp52 to i32 ; [#uses=1] + %toBool54 = icmp ne i32 %tmp5253, 0 ; [#uses=1] + br i1 %toBool54, label %bb18, label %bb55 + +bb55: ; preds = %bb49 + %tmp56 = call i32 @search( ) ; [#uses=1] + store i32 %tmp56, i32* %num_sol + %tmp57 = getelementptr [21 x i8], [21 x i8]* @.str44, i32 0, i32 0 ; [#uses=1] + %tmp58 = load i32, i32* %num_sol ; [#uses=1] + %tmp59 = call i32 (i8*, ...) @printf( i8* %tmp57, i32 %tmp58 ) ; [#uses=0] + %tmp60 = load i32, i32* @counters ; [#uses=1] + %tmp61 = icmp ne i32 %tmp60, 0 ; [#uses=1] + %tmp6162 = zext i1 %tmp61 to i32 ; [#uses=1] + %toBool63 = icmp ne i32 %tmp6162, 0 ; [#uses=1] + br i1 %toBool63, label %cond_true, label %cond_next + +cond_true: ; preds = %bb55 + store i32 0, i32* %total + %tmp64 = getelementptr [12 x i8], [12 x i8]* @.str45, i32 0, i32 0 ; [#uses=1] + %tmp65 = call i32 (i8*, ...) @printf( i8* %tmp64 ) ; [#uses=0] + store i32 0, i32* %i + br label %bb79 + +bb66: ; preds = %bb79 + %tmp67 = load i32, i32* %i ; [#uses=1] + %tmp68 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp67 ; [#uses=1] + %tmp69 = load i32, i32* %tmp68 ; [#uses=1] + %tmp70 = getelementptr [5 x i8], [5 x i8]* @.str46, i32 0, i32 0 ; [#uses=1] + %tmp71 = call i32 (i8*, ...) @printf( i8* %tmp70, i32 %tmp69 ) ; [#uses=0] + %tmp72 = load i32, i32* %i ; [#uses=1] + %tmp73 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp72 ; [#uses=1] + %tmp74 = load i32, i32* %tmp73 ; [#uses=1] + %tmp75 = load i32, i32* %total ; [#uses=1] + %tmp76 = add i32 %tmp74, %tmp75 ; [#uses=1] + store i32 %tmp76, i32* %total + %tmp77 = load i32, i32* %i ; [#uses=1] + %tmp78 = add i32 %tmp77, 1 ; [#uses=1] + store i32 %tmp78, i32* %i + br label %bb79 + +bb79: ; preds = %bb66, %cond_true + %tmp80 = load i32, i32* @numi ; [#uses=1] + %tmp81 = load i32, i32* %i ; [#uses=1] + %tmp82 = icmp slt i32 %tmp81, %tmp80 ; [#uses=1] + %tmp8283 = zext i1 %tmp82 to i32 ; [#uses=1] + %toBool84 = icmp ne i32 %tmp8283, 0 ; [#uses=1] + br i1 %toBool84, label %bb66, label %bb85 + +bb85: ; preds = %bb79 + %tmp86 = getelementptr [12 x i8], [12 x i8]* @.str47, i32 0, i32 0 ; [#uses=1] + %tmp87 = load i32, i32* %total ; [#uses=1] + %tmp88 = call i32 (i8*, ...) @printf( i8* %tmp86, i32 %tmp87 ) ; [#uses=0] + br label %cond_next + +cond_next: ; preds = %bb85, %bb55 + %tmp89 = load i32, i32* @numi ; [#uses=1] + %tmp90 = add i32 %tmp89, 1 ; [#uses=1] + store i32 %tmp90, i32* @numi + br label %bb91 + +bb91: ; preds = %cond_next, %entry + %tmp92 = load i32, i32* @numi ; [#uses=1] + %tmp93 = icmp sgt i32 %tmp92, 5 ; [#uses=1] + %tmp9394 = zext i1 %tmp93 to i32 ; [#uses=1] + %toBool95 = icmp ne i32 %tmp9394, 0 ; [#uses=1] + br i1 %toBool95, label %cond_true96, label %cond_next97 + +cond_true96: ; preds = %bb91 + br label %bb102 + +cond_next97: ; preds = %bb91 + %tmp98 = load i32, i32* %num_sol ; [#uses=1] + %tmp99 = icmp eq i32 %tmp98, 0 ; [#uses=1] + %tmp99100 = zext i1 %tmp99 to i32 ; [#uses=1] + %toBool101 = icmp ne i32 %tmp99100, 0 ; [#uses=1] + br i1 %toBool101, label %bb, label %bb102 + +bb102: ; preds = %cond_next97, %cond_true96 + store i32 0, i32* %tmp + %tmp103 = load i32, i32* %tmp ; [#uses=1] + store i32 %tmp103, i32* %retval + br label %return + +return: ; preds = %bb102 + %retval104 = load i32, i32* %retval ; [#uses=1] + ret i32 %retval104 +} + +declare i32 @userfun(i32) diff --git a/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll b/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll new file mode 100644 index 0000000..3b59bad --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +@img_width = external global i16 ; [#uses=2] + +define i32 @smpUMHEXBipredIntegerPelBlockMotionSearch(i16* %cur_pic, i16 signext %ref, i32 %list, i32 %pic_pix_x, i32 %pic_pix_y, i32 %blocktype, i16 signext %pred_mv_x1, i16 signext %pred_mv_y1, i16 signext %pred_mv_x2, i16 signext %pred_mv_y2, i16* %mv_x, i16* %mv_y, i16* %s_mv_x, i16* %s_mv_y, i32 %search_range, i32 %min_mcost, i32 %lambda_factor) { +cond_next143: ; preds = %entry + store i16 0, i16* @img_width, align 2 + br i1 false, label %cond_next449, label %cond_false434 + +cond_false434: ; preds = %cond_true415 + br label %cond_next449 + +cond_next449: ; preds = %cond_false434, %cond_true415 + br i1 false, label %cond_next698, label %cond_false470 + +cond_false470: ; preds = %cond_next449 + br label %cond_next698 + +cond_next698: ; preds = %cond_true492 + %tmp701 = load i16, i16* @img_width, align 2 ; [#uses=0] +; CHECK-NOT: %tmp701 = + ret i32 0 +} diff --git a/llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll b/llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll new file mode 100644 index 0000000..ee30cfc --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll @@ -0,0 +1,22 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; PR1996 + +%struct.anon = type { i32, i8, i8, i8, i8 } + +define i32 @a() { +entry: + %c = alloca %struct.anon ; <%struct.anon*> [#uses=2] + %tmp = getelementptr %struct.anon, %struct.anon* %c, i32 0, i32 0 ; [#uses=1] + %tmp1 = getelementptr i32, i32* %tmp, i32 1 ; [#uses=2] + %tmp2 = load i32, i32* %tmp1, align 4 ; [#uses=1] +; CHECK-NOT: load + %tmp3 = or i32 %tmp2, 11 ; [#uses=1] + %tmp4 = and i32 %tmp3, -21 ; [#uses=1] + store i32 %tmp4, i32* %tmp1, align 4 + %call = call i32 (...) @x( %struct.anon* %c ) ; [#uses=0] + ret i32 undef +} + + +declare i32 @x(...) diff --git a/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll b/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll new file mode 100644 index 0000000..5d60382 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-02-13-NewPHI.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -newgvn +; PR2032 + +define i32 @sscal(i32 %n, double %sa1, float* %sx, i32 %incx) { +entry: + %sx_addr = alloca float* ; [#uses=3] + store float* %sx, float** %sx_addr, align 4 + br label %bb33 + +bb: ; preds = %bb33 + %tmp27 = load float*, float** %sx_addr, align 4 ; [#uses=1] + store float 0.000000e+00, float* %tmp27, align 4 + store float* null, float** %sx_addr, align 4 + br label %bb33 + +bb33: ; preds = %bb, %entry + br i1 false, label %bb, label %return + +return: ; preds = %bb33 + %retval59 = load i32, i32* null, align 4 ; [#uses=1] + ret i32 %retval59 +} diff --git a/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll b/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll new file mode 100644 index 0000000..797cf57 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-07-02-Unreachable.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +; PR2503 + +@g_3 = external global i8 ; [#uses=2] + +define i8 @func_1(i32 %x, i32 %y) nounwind { +entry: + %A = alloca i8 + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %ifelse, label %ifthen + +ifthen: ; preds = %entry + br label %ifend + +ifelse: ; preds = %entry + %tmp3 = load i8, i8* @g_3 ; [#uses=0] + store i8 %tmp3, i8* %A + br label %afterfor + +forcond: ; preds = %forinc + br i1 false, label %afterfor, label %forbody + +forbody: ; preds = %forcond + br label %forinc + +forinc: ; preds = %forbody + br label %forcond + +afterfor: ; preds = %forcond, %forcond.thread + %tmp10 = load i8, i8* @g_3 ; [#uses=0] + ret i8 %tmp10 +; CHECK: ret i8 %tmp3 + +ifend: ; preds = %afterfor, %ifthen + ret i8 0 +} diff --git a/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll b/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll new file mode 100644 index 0000000..c1b5cc8 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -newgvn -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9.5" + %struct.anon = type { i8*, i32 } + %struct.d_print_info = type { i32, i8*, i32, i32, %struct.d_print_template*, %struct.d_print_mod*, i32 } + %struct.d_print_mod = type { %struct.d_print_mod*, %struct.demangle_component*, i32, %struct.d_print_template* } + %struct.d_print_template = type { %struct.d_print_template*, %struct.demangle_component* } + %struct.demangle_component = type { i32, { %struct.anon } } + +define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) nounwind { +entry: + %0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1 ; [#uses=1] + br i1 false, label %return, label %bb + +bb: ; preds = %entry + %1 = load i8*, i8** %0, align 4 ; [#uses=0] + %2 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1 ; [#uses=0] + br label %bb21 + +bb21: ; preds = %bb21, %bb + br label %bb21 + +return: ; preds = %entry + ret void +} + +; CHECK: define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) #0 { +; CHECK: entry: +; CHECK: %0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1 +; CHECK: br i1 false, label %return, label %bb +; CHECK: bb: +; CHECK: br label %bb21 +; CHECK: bb21: +; CHECK: br label %bb21 +; CHECK: return: +; CHECK: ret void +; CHECK: } diff --git a/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll b/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll new file mode 100644 index 0000000..54644ada --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -newgvn | llvm-dis +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: + br label %bb84 + +bb41: ; preds = %bb82 + %tmp = load i8, i8* %opt.0, align 1 ; [#uses=0] + %tmp1 = getelementptr i8, i8* %opt.0, i32 1 ; [#uses=2] + switch i32 0, label %bb81 [ + i32 102, label %bb82 + i32 110, label %bb79 + i32 118, label %bb80 + ] + +bb79: ; preds = %bb41 + br label %bb82 + +bb80: ; preds = %bb41 + ret i32 0 + +bb81: ; preds = %bb41 + ret i32 1 + +bb82: ; preds = %bb84, %bb79, %bb41 + %opt.0 = phi i8* [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ] ; [#uses=3] + %tmp2 = load i8, i8* %opt.0, align 1 ; [#uses=0] + br i1 false, label %bb84, label %bb41 + +bb84: ; preds = %bb82, %entry + %tmp3 = getelementptr i8, i8* null, i32 1 ; [#uses=1] + br label %bb82 +} diff --git a/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll b/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll new file mode 100644 index 0000000..44cbdee --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -newgvn | llvm-dis +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" +@sort_value = external global [256 x i32], align 32 ; <[256 x i32]*> [#uses=2] + +define i32 @Quiesce(i32 %alpha, i32 %beta, i32 %wtm, i32 %ply) nounwind { +entry: + br label %bb22 + +bb22: ; preds = %bb23, %bb22, %entry + br i1 false, label %bb23, label %bb22 + +bb23: ; preds = %bb23, %bb22 + %sortv.233 = phi i32* [ getelementptr ([256 x i32], [256 x i32]* @sort_value, i32 0, i32 0), %bb22 ], [ %sortv.2, %bb23 ] ; [#uses=1] + %0 = load i32, i32* %sortv.233, align 4 ; [#uses=0] + %sortv.2 = getelementptr [256 x i32], [256 x i32]* @sort_value, i32 0, i32 0 ; [#uses=1] + br i1 false, label %bb23, label %bb22 +} diff --git a/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll b/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll new file mode 100644 index 0000000..6a6c0d9 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -newgvn | llvm-dis +; Cached results must be added to and verified against the visited sets. +; PR3217 + +define fastcc void @gen_field_die(i32* %decl) nounwind { +entry: + br i1 false, label %bb203, label %bb202 + +bb202: ; preds = %entry + unreachable + +bb203: ; preds = %entry + %tmp = getelementptr i32, i32* %decl, i32 1 ; [#uses=1] + %tmp1 = load i32, i32* %tmp, align 4 ; [#uses=0] + br i1 false, label %bb207, label %bb204 + +bb204: ; preds = %bb203 + %tmp2 = getelementptr i32, i32* %decl, i32 1 ; [#uses=1] + br label %bb208 + +bb207: ; preds = %bb203 + br label %bb208 + +bb208: ; preds = %bb207, %bb204 + %iftmp.1374.0.in = phi i32* [ null, %bb207 ], [ %tmp2, %bb204 ] ; [#uses=1] + %iftmp.1374.0 = load i32, i32* %iftmp.1374.0.in ; [#uses=0] + unreachable +} diff --git a/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll b/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll new file mode 100644 index 0000000..07cdd4e --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -newgvn | llvm-dis +; PR3358 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + %struct.re_pattern_buffer = type { i8*, i64, i64, i64, i8*, i8*, i64, i8 } + %struct.re_registers = type { i32, i32*, i32* } + +define fastcc i32 @byte_re_match_2_internal(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %pos, %struct.re_registers* %regs, i32 %stop) nounwind { +entry: + br label %bb159 + +succeed_label: ; preds = %bb159 + ret i32 0 + +bb159: ; preds = %bb664, %bb554, %bb159, %bb159, %bb159, %entry + %d.0 = phi i8* [ null, %entry ], [ %d.0, %bb159 ], [ %d.0, %bb554 ], [ %d.0, %bb159 ], [ %d.0, %bb159 ], [ %d.12, %bb664 ] ; [#uses=5] + switch i32 0, label %bb661 [ + i32 0, label %bb159 + i32 1, label %succeed_label + i32 13, label %bb159 + i32 14, label %bb159 + i32 16, label %bb411 + i32 24, label %bb622 + i32 28, label %bb543 + ] + +bb411: ; preds = %bb411, %bb159 + br label %bb411 + +bb543: ; preds = %bb159 + br i1 false, label %bb549, label %bb550 + +bb549: ; preds = %bb543 + br label %bb554 + +bb550: ; preds = %bb543 + br i1 false, label %bb554, label %bb552 + +bb552: ; preds = %bb550 + %0 = load i8, i8* %d.0, align 8 ; [#uses=0] + br label %bb554 + +bb554: ; preds = %bb552, %bb550, %bb549 + br i1 false, label %bb159, label %bb661 + +bb622: ; preds = %bb622, %bb159 + br label %bb622 + +bb661: ; preds = %bb554, %bb159 + %d.12 = select i1 false, i8* null, i8* null ; [#uses=1] + br label %bb664 + +bb664: ; preds = %bb664, %bb661 + br i1 false, label %bb159, label %bb664 +} diff --git a/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll b/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll new file mode 100644 index 0000000..d02c5f2 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -newgvn | llvm-dis + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + %struct..4sPragmaType = type { i8*, i32 } + %struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 } + %struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* } + %struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 } + %struct.AuxData = type { i8*, void (i8*)* } + %struct.Bitvec = type { i32, i32, i32, { [125 x i32] } } + %struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* } + %struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* } + %struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* } + %struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* } + %struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] } + %struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 } + %struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 } + %struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* } + %struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 } + %struct.Context = type { i64, i32, %struct.Fifo } + %struct.CountCtx = type { i64 } + %struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* } + %struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* } + %struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..4sPragmaType, %struct..4sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 } + %struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* } + %struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 } + %struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 } + %struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* } + %struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] } + %struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] } + %struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* } + %struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 } + %struct.IdList = type { %struct..4sPragmaType*, i32, i32 } + %struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** } + %struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] } + %struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* } + %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.PgHdr*, i32, %struct.MemPage* } + %struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* } + %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, { i32 } } + %struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr*, i64, i64, i64, i64, i64, i32, void (%struct.PgHdr*, i32)*, void (%struct.PgHdr*, i32)*, i32, %struct.PgHdr**, i8*, [16 x i8] } + %struct.PagerLruLink = type { %struct.PgHdr*, %struct.PgHdr* } + %struct.PagerLruList = type { %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr* } + %struct.Parse = type { %struct.sqlite3*, i32, i8*, %struct.Vdbe*, i8, i8, i8, i8, i8, i8, i8, [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x i32], i32, %struct.TableLock*, i32, i32, i32, i32, i32, %struct.Expr**, i8, %struct..4sPragmaType, %struct..4sPragmaType, %struct..4sPragmaType, i8*, i8*, %struct.Table*, %struct.Trigger*, %struct.TriggerStack*, i8*, %struct..4sPragmaType, i8, %struct.Table*, i32 } + %struct.PgHdr = type { %struct.Pager*, i32, %struct.PgHdr*, %struct.PgHdr*, %struct.PagerLruLink, %struct.PgHdr*, i8, i8, i8, i8, i8, i16, %struct.PgHdr*, %struct.PgHdr*, i8* } + %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* } + %struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] } + %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] } + %struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 } + %struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* } + %struct.TableLock = type { i32, i32, i8, i8* } + %struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..4sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* } + %struct.TriggerStack = type { %struct.Table*, i32, i32, i32, i32, i32, i32, %struct.Trigger*, %struct.TriggerStack* } + %struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..4sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* } + %struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 } + %struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] } + %struct._OvflCell = type { i8*, i16 } + %struct._ht = type { i32, %struct.HashElem* } + %struct.anon = type { double } + %struct.sColMap = type { i32, i8* } + %struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %struct.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 } + %struct.sqlite3InitInfo = type { i32, i32, i8 } + %struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* } + %struct.sqlite3_file = type { %struct.sqlite3_io_methods* } + %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 } + %struct.sqlite3_index_constraint_usage = type { i32, i8 } + %struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double } + %struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* } + %struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* } + %struct.sqlite3_mutex = type opaque + %struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* } + %struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* } + %struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* } + +define fastcc void @sqlite3Insert(%struct.Parse* %pParse, %struct.SrcList* %pTabList, %struct.ExprList* %pList, %struct.Select* %pSelect, %struct.IdList* %pColumn, i32 %onError) nounwind { +entry: + br i1 false, label %bb54, label %bb69.loopexit + +bb54: ; preds = %entry + br label %bb69.loopexit + +bb59: ; preds = %bb63.preheader + %0 = load %struct..4sPragmaType*, %struct..4sPragmaType** %3, align 4 ; <%struct..4sPragmaType*> [#uses=0] + br label %bb65 + +bb65: ; preds = %bb63.preheader, %bb59 + %1 = load %struct..4sPragmaType*, %struct..4sPragmaType** %4, align 4 ; <%struct..4sPragmaType*> [#uses=0] + br i1 false, label %bb67, label %bb63.preheader + +bb67: ; preds = %bb65 + %2 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0 ; <%struct..4sPragmaType**> [#uses=0] + unreachable + +bb69.loopexit: ; preds = %bb54, %entry + %3 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0 ; <%struct..4sPragmaType**> [#uses=1] + %4 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0 ; <%struct..4sPragmaType**> [#uses=1] + br label %bb63.preheader + +bb63.preheader: ; preds = %bb69.loopexit, %bb65 + br i1 false, label %bb59, label %bb65 +} diff --git a/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll b/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll new file mode 100644 index 0000000..701556e --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll @@ -0,0 +1,110 @@ +; RUN: opt < %s -newgvn -disable-output +; PR3775 + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + %llvm.dbg.anchor.type = type { i32, i32 } + %"struct.__gnu_cxx::hash" = type <{ i8 }> + %struct.__sched_param = type { i32 } + %struct._pthread_descr_struct = type opaque + %struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 } + %struct.pthread_mutex_t = type { i32, i32, %struct._pthread_descr_struct*, i32, %llvm.dbg.anchor.type } + %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >" = type { %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >::_Rb_tree_impl,false>" } + %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >::_Rb_tree_impl,false>" = type { %"struct.__gnu_cxx::hash", %"struct.std::_Rb_tree_node_base", i32 } + %"struct.std::_Rb_tree_iterator > > >" = type { %"struct.std::_Rb_tree_node_base"* } + %"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } + %"struct.std::pair > > >,bool>" = type { %"struct.std::_Rb_tree_iterator > > >", i8 } + %"struct.std::pair" = type { i8*, i8* } + +@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once ; [#uses=0] +@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific ; [#uses=0] +@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific ; [#uses=0] +@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; [#uses=0] +@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32), i32 (i32)* @pthread_cancel ; [#uses=0] +@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; [#uses=0] +@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; [#uses=0] +@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; [#uses=0] +@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*), i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init ; [#uses=0] +@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create ; [#uses=0] +@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete ; [#uses=0] +@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_init ; [#uses=0] +@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32), i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype ; [#uses=0] +@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy ; [#uses=0] + +declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone + +define fastcc void @_ZNSt8_Rb_treeIPvSt4pairIKS0_S0_ESt10_Select1stIS3_ESt4lessIS0_ESaIS3_EE16_M_insert_uniqueERKS3_(%"struct.std::pair > > >,bool>"* noalias nocapture sret %agg.result, %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >"* %this, %"struct.std::pair"* %__v) nounwind { +entry: + br i1 false, label %bb7, label %bb + +bb: ; preds = %bb, %entry + br i1 false, label %bb5, label %bb + +bb5: ; preds = %bb + call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind + br i1 false, label %bb11, label %bb7 + +bb7: ; preds = %bb5, %entry + br label %bb11 + +bb11: ; preds = %bb7, %bb5 + call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind + unreachable +} + +define i32 @pthread_once(i32*, void ()*) { + ret i32 0 +} + +define i8* @pthread_getspecific(i32) { + ret i8* null +} + +define i32 @pthread_setspecific(i32, i8*) { + ret i32 0 +} + +define i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) { + ret i32 0 +} + +define i32 @pthread_cancel(i32) { + ret i32 0 +} + +define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) { + ret i32 0 +} + +define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) { + ret i32 0 +} + +define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) { + ret i32 0 +} + +define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*) { + ret i32 0 +} + +define i32 @pthread_key_create(i32*, void (i8*)*) { + ret i32 0 +} + +define i32 @pthread_key_delete(i32) { + ret i32 0 +} + +define i32 @pthread_mutexattr_init(%struct.__sched_param*) { + ret i32 0 +} + +define i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32) { + ret i32 0 +} + +define i32 @pthread_mutexattr_destroy(%struct.__sched_param*) { + ret i32 0 +} diff --git a/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll b/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll new file mode 100644 index 0000000..e95c1ae --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -newgvn | llvm-dis +; PR4256 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + %llvm.dbg.anchor.type = type { i32, i32 } + %struct.cset = type { i8*, i8, i8, i32, i8* } + %struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* } + %struct.re_guts = type { i32*, %struct.cset*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, [1 x i8] } + +define i8* @lbackref(%struct.lmat* %m, i8* %start, i8* %stop, i32 %startst, i32 %stopst, i32 %lev, i32 %rec) nounwind { +entry: + br label %bb63 + +bb: ; preds = %bb63 + switch i32 0, label %bb62 [ + i32 268435456, label %bb2 + i32 805306368, label %bb9 + i32 -1610612736, label %bb51 + ] + +bb2: ; preds = %bb + br label %bb62 + +bb9: ; preds = %bb + %0 = load i8, i8* %sp.1, align 1 ; [#uses=0] + br label %bb62 + +bb51: ; preds = %bb + %1 = load i8, i8* %sp.1, align 1 ; [#uses=0] + ret i8* null + +bb62: ; preds = %bb9, %bb2, %bb + br label %bb63 + +bb63: ; preds = %bb84, %bb69, %bb62, %entry + %sp.1 = phi i8* [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ] ; [#uses=3] + br i1 false, label %bb, label %bb65 + +bb65: ; preds = %bb63 + %sp.1.lcssa = phi i8* [ %sp.1, %bb63 ] ; [#uses=4] + br i1 false, label %bb66, label %bb69 + +bb66: ; preds = %bb65 + ret i8* null + +bb69: ; preds = %bb65 + switch i32 0, label %bb108.loopexit2.loopexit.loopexit [ + i32 1342177280, label %bb63 + i32 1476395008, label %bb84 + i32 1879048192, label %bb104 + i32 2013265920, label %bb93 + ] + +bb84: ; preds = %bb69 + %2 = tail call i8* @lbackref(%struct.lmat* %m, i8* %sp.1.lcssa, i8* %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind ; [#uses=0] + br label %bb63 + +bb93: ; preds = %bb69 + ret i8* null + +bb104: ; preds = %bb69 + %sp.1.lcssa.lcssa33 = phi i8* [ %sp.1.lcssa, %bb69 ] ; [#uses=0] + unreachable + +bb108.loopexit2.loopexit.loopexit: ; preds = %bb69 + ret i8* null +} diff --git a/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll b/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll new file mode 100644 index 0000000..a112157 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll @@ -0,0 +1,15 @@ +; Test to make sure malloc's bitcast does not block detection of a store +; to aliased memory; GVN should not optimize away the load in this program. +; RUN: opt < %s -newgvn -S | FileCheck %s + +define i64 @test() { + %1 = tail call i8* @malloc(i64 mul (i64 4, i64 ptrtoint (i64* getelementptr (i64, i64* null, i64 1) to i64))) ; [#uses=2] + store i8 42, i8* %1 + %X = bitcast i8* %1 to i64* ; [#uses=1] + %Y = load i64, i64* %X ; [#uses=1] + ret i64 %Y +; CHECK: %Y = load i64, i64* %X +; CHECK: ret i64 %Y +} + +declare noalias i8* @malloc(i64) diff --git a/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll b/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll new file mode 100644 index 0000000..0ff1991 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +; CHECK-NOT: load +; CHECK-NOT: phi + +define i8* @cat(i8* %s1, ...) nounwind { +entry: + br i1 undef, label %bb, label %bb3 + +bb: ; preds = %entry + unreachable + +bb3: ; preds = %entry + store i8* undef, i8** undef, align 4 + br i1 undef, label %bb5, label %bb6 + +bb5: ; preds = %bb3 + unreachable + +bb6: ; preds = %bb3 + br label %bb12 + +bb8: ; preds = %bb12 + br i1 undef, label %bb9, label %bb10 + +bb9: ; preds = %bb8 + %0 = load i8*, i8** undef, align 4 ; [#uses=0] + %1 = load i8*, i8** undef, align 4 ; [#uses=0] + br label %bb11 + +bb10: ; preds = %bb8 + br label %bb11 + +bb11: ; preds = %bb10, %bb9 + br label %bb12 + +bb12: ; preds = %bb11, %bb6 + br i1 undef, label %bb8, label %bb13 + +bb13: ; preds = %bb12 + ret i8* undef +} diff --git a/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll b/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll new file mode 100644 index 0000000..d4acc188 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2010-05-08-OneBit.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -newgvn +; PR7052 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main(i32 %argc, i8** nocapture %argv) personality i32 (...)* @__gxx_personality_v0 { +entry: + %0 = getelementptr inbounds i8, i8* undef, i64 5 ; [#uses=1] + %1 = bitcast i8* %0 to i32* ; [#uses=1] + store i32 undef, i32* %1, align 1 + br i1 undef, label %k121.i.i, label %l117.i.i + +l117.i.i: ; preds = %entry + invoke fastcc void @foo() + to label %.noexc5 unwind label %landing_pad + +.noexc5: ; preds = %l117.i.i + unreachable + +k121.i.i: ; preds = %entry + br i1 undef, label %l129.i.i, label %k133.i.i + +l129.i.i: ; preds = %k121.i.i + invoke fastcc void @foo() + to label %.noexc7 unwind label %landing_pad + +.noexc7: ; preds = %l129.i.i + unreachable + +k133.i.i: ; preds = %k121.i.i + %2 = getelementptr i8, i8* undef, i64 5 ; [#uses=1] + %3 = bitcast i8* %2 to i1* ; [#uses=1] + %4 = load i1, i1* %3 ; [#uses=1] + br i1 %4, label %k151.i.i, label %l147.i.i + +l147.i.i: ; preds = %k133.i.i + invoke fastcc void @foo() + to label %.noexc10 unwind label %landing_pad + +.noexc10: ; preds = %l147.i.i + unreachable + +k151.i.i: ; preds = %k133.i.i + ret i32 0 + +landing_pad: ; preds = %l147.i.i, %l129.i.i, %l117.i.i + %exn = landingpad {i8*, i32} + cleanup + switch i32 undef, label %fin [ + i32 1, label %catch1 + i32 2, label %catch + ] + +fin: ; preds = %landing_pad + unreachable + +catch: ; preds = %landing_pad + ret i32 1 + +catch1: ; preds = %landing_pad + ret i32 2 +} + +declare fastcc void @foo() + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll b/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll new file mode 100644 index 0000000..635c4b8 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2010-11-13-Simplify.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +declare i32 @foo(i32) readnone + +define i1 @bar() { +; CHECK-LABEL: @bar( + %a = call i32 @foo (i32 0) readnone + %b = call i32 @foo (i32 0) readnone + %c = and i32 %a, %b + %x = call i32 @foo (i32 %a) readnone + %y = call i32 @foo (i32 %c) readnone + %z = icmp eq i32 %x, %y + ret i1 %z +; CHECK: ret i1 true +} diff --git a/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll b/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll new file mode 100644 index 0000000..4904c35 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2011-04-27-phioperands.ll @@ -0,0 +1,106 @@ +; RUN: opt -newgvn -disable-output < %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" + +@nuls = external global [10 x i8] + +define fastcc void @p_ere() nounwind { +entry: + br label %"" + +".i": + br i1 undef, label %".i30.i", label %doemit.exit51.i + +".i30.i": + unreachable + +doemit.exit51.i: + br label %".i" + +".i": + br i1 undef, label %".i55.i", label %doemit.exit76.i + +".i55.i": + unreachable + +doemit.exit76.i: + br label %".i" + +".i": + store i8* getelementptr inbounds ([10 x i8], [10 x i8]* @nuls, i64 0, i64 0), i8** undef, align 8 + br label %".i" + +".i": + br label %".i" + +".i": + br i1 undef, label %".i", label %".i" + +".i": + br label %".i" + +".i": + br label %".i" + +".i": + br label %".i" + +".i": + br label %".i" + +".i": + %wascaret_2.i = phi i32 [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %doemit.exit76.i ], [ 1, %doemit.exit51.i ], [ 0, %".i" ] + %D.5496_84.i = load i8*, i8** undef, align 8 + br i1 undef, label %".i", label %"" + +".i": + br i1 undef, label %"", label %".i" + +".i": + br i1 undef, label %".i", label %".i" + +".i": + br label %".i" + +".i": + switch i32 undef, label %"" [ + i32 42, label %".i" + i32 43, label %".i" + i32 63, label %".i" + i32 123, label %".i258.i" + ] + +".i": + br i1 undef, label %".i105.i", label %doemit.exit127.i + +".i105.i": + unreachable + +doemit.exit127.i: + unreachable + +".i": + br i1 undef, label %".i157.i", label %"" + +".i157.i": + unreachable + +".i": + br label %"" + +".i258.i": + unreachable + +"": + switch i32 undef, label %".i" [ + i32 36, label %".i" + i32 94, label %".i" + i32 124, label %".i" + i32 42, label %".i" + i32 43, label %".i" + i32 46, label %".i" + i32 63, label %".i" + i32 91, label %".i" + i32 92, label %".i" + ] +} diff --git a/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll b/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll new file mode 100644 index 0000000..4b47b06 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll @@ -0,0 +1,86 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; + +%0 = type { i64, i1 } + +define i64 @test1(i64 %a, i64 %b) nounwind ssp { +entry: + %uadd = tail call %0 @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %uadd.0 = extractvalue %0 %uadd, 0 + %add1 = add i64 %a, %b + ret i64 %add1 +} + +; CHECK-LABEL: @test1( +; CHECK-NOT: add1 +; CHECK: ret + +define i64 @test2(i64 %a, i64 %b) nounwind ssp { +entry: + %usub = tail call %0 @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %usub.0 = extractvalue %0 %usub, 0 + %sub1 = sub i64 %a, %b + ret i64 %sub1 +} + +; CHECK-LABEL: @test2( +; CHECK-NOT: sub1 +; CHECK: ret + +define i64 @test3(i64 %a, i64 %b) nounwind ssp { +entry: + %umul = tail call %0 @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %umul.0 = extractvalue %0 %umul, 0 + %mul1 = mul i64 %a, %b + ret i64 %mul1 +} + +; CHECK-LABEL: @test3( +; CHECK-NOT: mul1 +; CHECK: ret + +define i64 @test4(i64 %a, i64 %b) nounwind ssp { +entry: + %sadd = tail call %0 @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %sadd.0 = extractvalue %0 %sadd, 0 + %add1 = add i64 %a, %b + ret i64 %add1 +} + +; CHECK-LABEL: @test4( +; CHECK-NOT: add1 +; CHECK: ret + +define i64 @test5(i64 %a, i64 %b) nounwind ssp { +entry: + %ssub = tail call %0 @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %ssub.0 = extractvalue %0 %ssub, 0 + %sub1 = sub i64 %a, %b + ret i64 %sub1 +} + +; CHECK-LABEL: @test5( +; CHECK-NOT: sub1 +; CHECK: ret + +define i64 @test6(i64 %a, i64 %b) nounwind ssp { +entry: + %smul = tail call %0 @llvm.smul.with.overflow.i64(i64 %a, i64 %b) + %smul.0 = extractvalue %0 %smul, 0 + %mul1 = mul i64 %a, %b + ret i64 %mul1 +} + +; CHECK-LABEL: @test6( +; CHECK-NOT: mul1 +; CHECK: ret + +declare void @exit(i32) noreturn +declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll b/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll new file mode 100644 index 0000000..719ce6d --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll @@ -0,0 +1,81 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +%struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo } +%struct.__type_info_pseudo = type { i8*, i8* } + +@_ZTIi = external constant %struct.__fundamental_type_info_pseudo +@_ZTIb = external constant %struct.__fundamental_type_info_pseudo + +declare void @_Z4barv() + +declare void @_Z7cleanupv() + +declare i32 @llvm.eh.typeid.for(i8*) nounwind readonly + +declare i8* @__cxa_begin_catch(i8*) nounwind + +declare void @__cxa_end_catch() + +declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*) + +define void @_Z3foov() uwtable personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 { +entry: + invoke void @_Z4barv() + to label %return unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + catch %struct.__fundamental_type_info_pseudo* @_ZTIi + catch %struct.__fundamental_type_info_pseudo* @_ZTIb + catch %struct.__fundamental_type_info_pseudo* @_ZTIi + catch %struct.__fundamental_type_info_pseudo* @_ZTIb + %exc_ptr2.i = extractvalue { i8*, i32 } %0, 0 + %filter3.i = extractvalue { i8*, i32 } %0, 1 + %typeid.i = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*)) +; CHECK: call i32 @llvm.eh.typeid.for + %1 = icmp eq i32 %filter3.i, %typeid.i + br i1 %1, label %ppad, label %next + +next: ; preds = %lpad + %typeid1.i = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIb to i8*)) +; CHECK: call i32 @llvm.eh.typeid.for + %2 = icmp eq i32 %filter3.i, %typeid1.i + br i1 %2, label %ppad2, label %next2 + +ppad: ; preds = %lpad + %3 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +ppad2: ; preds = %next + %D.2073_5.i = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +next2: ; preds = %next + call void @_Z7cleanupv() + %typeid = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*)) +; CHECK-NOT: call i32 @llvm.eh.typeid.for + %4 = icmp eq i32 %filter3.i, %typeid + br i1 %4, label %ppad3, label %next3 + +next3: ; preds = %next2 + %typeid1 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIb to i8*)) + %5 = icmp eq i32 %filter3.i, %typeid1 + br i1 %5, label %ppad4, label %unwind + +unwind: ; preds = %next3 + resume { i8*, i32 } %0 + +ppad3: ; preds = %next2 + %6 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +ppad4: ; preds = %next3 + %D.2080_5 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +return: ; preds = %ppad4, %ppad3, %ppad2, %ppad, %entry + ret void +} diff --git a/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll b/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll new file mode 100644 index 0000000..ba3cec8 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2012-05-22-PreCrash.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -newgvn +; PR12858 + +define void @fn5(i16 signext %p1, i8 signext %p2) nounwind uwtable { +entry: + br i1 undef, label %if.else, label %if.then + +if.then: ; preds = %entry + br label %if.end + +if.else: ; preds = %entry + %conv = sext i16 %p1 to i32 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %conv1 = sext i16 %p1 to i32 + br i1 undef, label %if.then3, label %if.else4 + +if.then3: ; preds = %if.end + br label %if.end12 + +if.else4: ; preds = %if.end + %conv7 = sext i8 %p2 to i32 + %cmp8 = icmp eq i32 %conv1, %conv7 + br i1 %cmp8, label %if.then10, label %if.end12 + +if.then10: ; preds = %if.else4 + br label %if.end12 + +if.end12: ; preds = %if.then10, %if.else4, %if.then3 + %conv13 = sext i8 %p2 to i32 + ret void +} diff --git a/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll b/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll new file mode 100644 index 0000000..a3511c3 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll @@ -0,0 +1,43 @@ +; XFAIL: * +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) +declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) + +; This test ensures that masked scatter and gather operations, which take vectors of pointers, +; do not have pointer aliasing ignored when being processed. +; No scatter/gather calls should end up eliminated +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.scatter +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.scatter +; CHECK: llvm.masked.gather +define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) { +entry: + ; Just some temporary storage + %tmp.0 = alloca i32 + %tmp.1 = alloca i32 + %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0 + %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1 + ; Read from in1 and in2 + %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1 + %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in1 to the allocas + call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> ); + ; Read in1 from the allocas + ; This gather should alias the scatter we just saw + %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in2 to the allocas + call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> ); + ; Read in2 from the allocas + ; This gather should alias the scatter we just saw, and not be eliminated + %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in2 to out for good measure + %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0 + %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1 + store i32 %tmp.v.1.0, i32* %out + %out.1 = getelementptr i32, i32* %out, i32 1 + store i32 %tmp.v.1.1, i32* %out.1 + ret void +} diff --git a/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll b/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll new file mode 100644 index 0000000..559882c --- /dev/null +++ b/llvm/test/Transforms/NewGVN/MemdepMiscompile.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.0" + +; rdar://12801584 +; Value of %shouldExit can be changed by RunInMode. +; Make sure we do not replace load %shouldExit in while.cond.backedge +; with a phi node where the value from while.body is 0. +define i32 @test() nounwind ssp { +entry: +; CHECK: test() +; CHECK: while.body: +; CHECK: call void @RunInMode +; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then +; CHECK: while.cond.backedge: +; CHECK: load i32, i32* %shouldExit +; CHECK: br i1 %cmp, label %while.body + %shouldExit = alloca i32, align 4 + %tasksIdle = alloca i32, align 4 + store i32 0, i32* %shouldExit, align 4 + store i32 0, i32* %tasksIdle, align 4 + call void @CTestInitialize(i32* %tasksIdle) nounwind + %0 = load i32, i32* %shouldExit, align 4 + %cmp1 = icmp eq i32 %0, 0 + br i1 %cmp1, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: + br label %while.body + +while.body: + call void @RunInMode(i32 100) nounwind + %1 = load i32, i32* %tasksIdle, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %while.cond.backedge, label %if.then + +if.then: + store i32 0, i32* %tasksIdle, align 4 + call void @TimerCreate(i32* %shouldExit) nounwind + br label %while.cond.backedge + +while.cond.backedge: + %2 = load i32, i32* %shouldExit, align 4 + %cmp = icmp eq i32 %2, 0 + br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: + br label %while.end + +while.end: + ret i32 0 +} +declare void @CTestInitialize(i32*) +declare void @RunInMode(i32) +declare void @TimerCreate(i32*) diff --git a/llvm/test/Transforms/NewGVN/assume-equal.ll b/llvm/test/Transforms/NewGVN/assume-equal.ll new file mode 100644 index 0000000..b6c2a7a --- /dev/null +++ b/llvm/test/Transforms/NewGVN/assume-equal.ll @@ -0,0 +1,276 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +; Checks if indirect calls can be replaced with direct +; assuming that %vtable == @_ZTV1A (with alignment). +; Checking const propagation across other BBs +; CHECK-LABEL: define void @_Z1gb( + +define void @_Z1gb(i1 zeroext %p) { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) #4 + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) #1 + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + br i1 %p, label %if.then, label %if.else + +if.then: ; preds = %entry + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + + ; CHECK: call i32 @_ZN1A3fooEv( + %call2 = tail call i32 %2(%struct.A* %0) #1 + + br label %if.end + +if.else: ; preds = %entry + %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1 + %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3barEv( + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8 + + %call5 = tail call i32 %3(%struct.A* %0) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; Check integration with invariant.group handling +; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) { +define void @invariantGroupHandling(i1 zeroext %p) { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) #4 + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) #1 + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8, !invariant.group !0 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + br i1 %p, label %if.then, label %if.else + +if.then: ; preds = %entry + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + +; CHECK: call i32 @_ZN1A3fooEv( + %call2 = tail call i32 %2(%struct.A* %0) #1 + %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 + %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** + %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 +; FIXME: those loads could be also direct, but right now the invariant.group +; analysis works only on single block +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %callx = tail call i32 %call1(%struct.A* %0) #1 + + %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 + %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** + %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %cally = tail call i32 %call4(%struct.A* %0) #1 + + %b = bitcast i8* %call to %struct.A** + %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 + %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** + %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %unknown = tail call i32 %vfun(%struct.A* %0) #1 + + br label %if.end + +if.else: ; preds = %entry + %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1 + %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3barEv( + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8 + + %call5 = tail call i32 %3(%struct.A* %0) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + + +; Checking const propagation in the same BB +; CHECK-LABEL: define i32 @main() + +define i32 @main() { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3fooEv( + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + + %call2 = tail call i32 %2(%struct.A* %0) + ret i32 0 +} + +; This tests checks const propatation with fcmp instruction. +; CHECK-LABEL: define float @_Z1gf(float %p) + +define float @_Z1gf(float %p) { +entry: + %p.addr = alloca float, align 4 + %f = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + store float 3.000000e+00, float* %f, align 4 + %0 = load float, float* %p.addr, align 4 + %1 = load float, float* %f, align 4 + %cmp = fcmp oeq float %1, %0 ; note const on lhs + call void @llvm.assume(i1 %cmp) + + ; CHECK: ret float 3.000000e+00 + ret float %0 +} + +; CHECK-LABEL: define float @_Z1hf(float %p) + +define float @_Z1hf(float %p) { +entry: + %p.addr = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + %0 = load float, float* %p.addr, align 4 + %cmp = fcmp nnan ueq float %0, 3.000000e+00 + call void @llvm.assume(i1 %cmp) + + ; CHECK: ret float 3.000000e+00 + ret float %0 +} + +; CHECK-LABEL: define float @_Z1if(float %p) +define float @_Z1if(float %p) { +entry: + %p.addr = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + %0 = load float, float* %p.addr, align 4 + %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate + call void @llvm.assume(i1 %cmp) + + ; CHECK-NOT: ret float 3.000000e+00 + ret float %0 +} + +; This test checks if constant propagation works for multiple node edges +; CHECK-LABEL: define i32 @_Z1ii(i32 %p) +define i32 @_Z1ii(i32 %p) { +entry: + %cmp = icmp eq i32 %p, 42 + call void @llvm.assume(i1 %cmp) + + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 +bb2: + call void @llvm.assume(i1 true) + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 + + ; CHECK: ret i32 42 + ret i32 %p +} + +; CHECK-LABEL: define i32 @_Z1ij(i32 %p) +define i32 @_Z1ij(i32 %p) { +entry: + %cmp = icmp eq i32 %p, 42 + call void @llvm.assume(i1 %cmp) + + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 +bb2: + ; CHECK-NOT: %cmp2 = + %cmp2 = icmp eq i32 %p, 42 + ; CHECK-NOT: call void @llvm.assume( + call void @llvm.assume(i1 %cmp2) + + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 + + ; CHECK: ret i32 42 + ret i32 %p +} + +; CHECK-LABEL: define i32 @_Z1ik(i32 %p) +define i32 @_Z1ik(i32 %p) { +entry: + %cmp = icmp eq i32 %p, 42 + call void @llvm.assume(i1 %cmp) + + ; CHECK: br i1 true, label %bb2, label %bb3 + br i1 %cmp, label %bb2, label %bb3 +bb2: + ; CHECK-NOT: %cmp3 = + %cmp3 = icmp eq i32 %p, 43 + ; CHECK: store i8 undef, i8* null + call void @llvm.assume(i1 %cmp3) + ret i32 15 +bb3: + ret i32 17 +} + +; This test checks if GVN can do the constant propagation correctly +; when there are multiple uses of the same assume value in the +; basic block that has a loop back-edge pointing to itself. +; +; CHECK-LABEL: define i32 @_Z1il(i32 %val, i1 %k) +define i32 @_Z1il(i32 %val, i1 %k) { + br label %next + +next: +; CHECK: tail call void @llvm.assume(i1 %k) +; CHECK-NEXT: %cmp = icmp eq i32 %val, 50 + tail call void @llvm.assume(i1 %k) + tail call void @llvm.assume(i1 %k) + %cmp = icmp eq i32 %val, 50 + br i1 %cmp, label %next, label %meh + +meh: + ret i32 0 +} + +; This test checks if GVN can prevent the constant propagation correctly +; in the successor blocks that are not dominated by the basic block +; with the assume instruction. +; +; CHECK-LABEL: define i1 @_z1im(i32 %val, i1 %k, i1 %j) +define i1 @_z1im(i32 %val, i1 %k, i1 %j) { + br i1 %j, label %next, label %meh + +next: +; CHECK: tail call void @llvm.assume(i1 %k) +; CHECK-NEXT: br label %meh + tail call void @llvm.assume(i1 %k) + tail call void @llvm.assume(i1 %k) + br label %meh + +meh: +; CHECK: ret i1 %k + ret i1 %k +} + +declare noalias i8* @_Znwm(i64) +declare void @_ZN1AC1Ev(%struct.A*) +declare void @llvm.assume(i1) +declare i32 @_ZN1A3fooEv(%struct.A*) +declare i32 @_ZN1A3barEv(%struct.A*) + +!0 = !{!"struct A"} diff --git a/llvm/test/Transforms/NewGVN/basic-undef-test.ll b/llvm/test/Transforms/NewGVN/basic-undef-test.ll new file mode 100644 index 0000000..681e77b --- /dev/null +++ b/llvm/test/Transforms/NewGVN/basic-undef-test.ll @@ -0,0 +1,15 @@ +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +; ModuleID = 'test3.ll' +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @main(i32 *%foo) { +entry: +; CHECK: load i32, i32* %foo, align 4 + %0 = load i32, i32* %foo, align 4 + store i32 5, i32* undef, align 4 +; CHECK-NOT: load i32, i32* %foo, align 4 + %1 = load i32, i32* %foo, align 4 +; CHECK: add i32 %0, %0 + %2 = add i32 %0, %1 + ret i32 %2 +} diff --git a/llvm/test/Transforms/NewGVN/basic.ll b/llvm/test/Transforms/NewGVN/basic.ll new file mode 100644 index 0000000..40c83a1 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/basic.ll @@ -0,0 +1,17 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; RUN: opt < %s -passes=gvn -S | FileCheck %s + +define i32 @main() { +block1: + %z1 = bitcast i32 0 to i32 + br label %block2 +block2: + %z2 = bitcast i32 0 to i32 + ret i32 %z2 +} + +; CHECK: define i32 @main() { +; CHECK-NEXT: block1: +; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: } diff --git a/llvm/test/Transforms/NewGVN/big-endian.ll b/llvm/test/Transforms/NewGVN/big-endian.ll new file mode 100644 index 0000000..46b336b --- /dev/null +++ b/llvm/test/Transforms/NewGVN/big-endian.ll @@ -0,0 +1,40 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +;; Make sure we use correct bit shift based on storage size for +;; loads reusing a load value. +define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) { +; CHECK-LABEL: @test1 +; CHECK-NOT: [[V1:%.*]] = load i16, i16* %{{.*}} +; CHECK-NOT: [[V2:%.*]] = lshr i16 [[V1]], 8 +; CHECK-NOT: trunc i16 [[V2]] to i1 + + %valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0 + %valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8 + %valueLoadB.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 0 + %valueLoadB.fca.0.load = load i1, i1* %valueLoadB.fca.0.gep, align 8 + %isTrue = and i1 %valueLoadA.fca.0.load, %valueLoadB.fca.0.load + %valueLoadA.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 1 + %valueLoadA.fca.1.load = load i8, i8* %valueLoadA.fca.1.gep, align 1 + %isNotNullA = icmp ne i8 %valueLoadA.fca.1.load, 0 + %valueLoadB.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 1 + %valueLoadB.fca.1.load = load i8, i8* %valueLoadB.fca.1.gep, align 1 + %isNotNullB = icmp ne i8 %valueLoadB.fca.1.load, 0 + %isNotNull = and i1 %isNotNullA, %isNotNullB + %isTrueAndNotNull = and i1 %isTrue, %isNotNull + %ret = zext i1 %isTrueAndNotNull to i64 + ret i64 %ret +} + +;; And likewise for loads reusing a store value. +define i1 @test2(i8 %V, i8* %P) { +; CHECK-LABEL: @test2 +; CHECK-NOT: lshr + store i8 %V, i8* %P + %P2 = bitcast i8* %P to i1* + %A = load i1, i1* %P2 + ret i1 %A +} + diff --git a/llvm/test/Transforms/NewGVN/bitcast-of-call.ll b/llvm/test/Transforms/NewGVN/bitcast-of-call.ll new file mode 100644 index 0000000..7b25038 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/bitcast-of-call.ll @@ -0,0 +1,14 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; PR2213 + +define i32* @f(i8* %x) { +entry: + %tmp = call i8* @m( i32 12 ) ; [#uses=2] + %tmp1 = bitcast i8* %tmp to i32* ; [#uses=0] + %tmp2 = bitcast i8* %tmp to i32* ; [#uses=0] +; CHECK-NOT: %tmp2 + ret i32* %tmp2 +} + +declare i8* @m(i32) diff --git a/llvm/test/Transforms/NewGVN/br-identical.ll b/llvm/test/Transforms/NewGVN/br-identical.ll new file mode 100644 index 0000000..672887c --- /dev/null +++ b/llvm/test/Transforms/NewGVN/br-identical.ll @@ -0,0 +1,38 @@ +; RUN: opt -newgvn -S -o - %s | FileCheck %s + +; If a branch has two identical successors, we cannot declare either dead. + +define void @widget(i1 %p) { +entry: + br label %bb2 + +bb2: + %t1 = phi i64 [ 0, %entry ], [ %t5, %bb7 ] + %t2 = add i64 %t1, 1 + %t3 = icmp ult i64 0, %t2 + br i1 %t3, label %bb3, label %bb4 + +bb3: + %t4 = call i64 @f() + br label %bb4 + +bb4: + ; CHECK-NOT: phi {{.*}} undef + %foo = phi i64 [ %t4, %bb3 ], [ 0, %bb2 ] + br i1 %p, label %bb5, label %bb6 + +bb5: + br i1 true, label %bb7, label %bb7 + +bb6: + br i1 true, label %bb7, label %bb7 + +bb7: + %t5 = add i64 %t1, 1 + br i1 %p, label %bb2, label %bb8 + +bb8: + ret void +} + +declare i64 @f() diff --git a/llvm/test/Transforms/NewGVN/calloc-load-removal.ll b/llvm/test/Transforms/NewGVN/calloc-load-removal.ll new file mode 100644 index 0000000..e687044 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/calloc-load-removal.ll @@ -0,0 +1,26 @@ +; XFAIL: * +; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s +; RUN: opt -S -basicaa -newgvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS +; Check that loads from calloc are recognized as being zero. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define i32 @test1() { + %1 = tail call noalias i8* @calloc(i64 1, i64 4) + %2 = bitcast i8* %1 to i32* + ; This load is trivially constant zero + %3 = load i32, i32* %2, align 4 + ret i32 %3 + +; CHECK-LABEL: @test1( +; CHECK-NOT: %3 = load i32, i32* %2, align 4 +; CHECK: ret i32 0 + +; CHECK_NO_LIBCALLS-LABEL: @test1( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: ret i32 % + +} + +declare noalias i8* @calloc(i64, i64) diff --git a/llvm/test/Transforms/NewGVN/calls-nonlocal.ll b/llvm/test/Transforms/NewGVN/calls-nonlocal.ll new file mode 100644 index 0000000..292060d --- /dev/null +++ b/llvm/test/Transforms/NewGVN/calls-nonlocal.ll @@ -0,0 +1,76 @@ +; XFAIL: * +; Two occurrences of strlen should be zapped. +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9" + +define i32 @test(i32 %g, i8* %P) nounwind { +entry: + %tmp2 = call i32 @strlen( i8* %P ) nounwind readonly ; [#uses=1] + %tmp3 = icmp eq i32 %tmp2, 100 ; [#uses=1] + %tmp34 = zext i1 %tmp3 to i8 ; [#uses=1] + %toBool = icmp ne i8 %tmp34, 0 ; [#uses=1] + br i1 %toBool, label %bb, label %bb6 + +bb: ; preds = %entry + br label %bb27 + +bb6: ; preds = %entry + %tmp8 = add i32 %g, 42 ; [#uses=2] + %tmp10 = call i32 @strlen( i8* %P ) nounwind readonly ; [#uses=1] + %tmp11 = icmp eq i32 %tmp10, 100 ; [#uses=1] + %tmp1112 = zext i1 %tmp11 to i8 ; [#uses=1] + %toBool13 = icmp ne i8 %tmp1112, 0 ; [#uses=1] + br i1 %toBool13, label %bb14, label %bb16 + +bb14: ; preds = %bb6 + br label %bb27 + +bb16: ; preds = %bb6 + %tmp18 = mul i32 %tmp8, 2 ; [#uses=1] + %tmp20 = call i32 @strlen( i8* %P ) nounwind readonly ; [#uses=1] + %tmp21 = icmp eq i32 %tmp20, 100 ; [#uses=1] + %tmp2122 = zext i1 %tmp21 to i8 ; [#uses=1] + %toBool23 = icmp ne i8 %tmp2122, 0 ; [#uses=1] + br i1 %toBool23, label %bb24, label %bb26 + +bb24: ; preds = %bb16 + br label %bb27 + +bb26: ; preds = %bb16 + br label %bb27 + +bb27: ; preds = %bb26, %bb24, %bb14, %bb + %tmp.0 = phi i32 [ 11, %bb26 ], [ %tmp18, %bb24 ], [ %tmp8, %bb14 ], [ %g, %bb ] ; [#uses=1] + br label %return + +return: ; preds = %bb27 + ret i32 %tmp.0 +} + +; CHECK: define i32 @test(i32 %g, i8* %P) #0 { +; CHECK: entry: +; CHECK: %tmp2 = call i32 @strlen(i8* %P) #1 +; CHECK: %tmp3 = icmp eq i32 %tmp2, 100 +; CHECK: %tmp34 = zext i1 %tmp3 to i8 +; CHECK: br i1 %tmp3, label %bb, label %bb6 +; CHECK: bb: +; CHECK: br label %bb27 +; CHECK: bb6: +; CHECK: %tmp8 = add i32 %g, 42 +; CHECK: br i1 false, label %bb14, label %bb16 +; CHECK: bb14: +; CHECK: br label %bb27 +; CHECK: bb16: +; CHECK: %tmp18 = mul i32 %tmp8, 2 +; CHECK: br i1 false, label %bb24, label %bb26 +; CHECK: bb24: +; CHECK: br label %bb27 +; CHECK: bb26: +; CHECK: br label %bb27 +; CHECK: bb27: +; CHECK: %tmp.0 = phi i32 [ 11, %bb26 ], [ undef, %bb24 ], [ undef, %bb14 ], [ %g, %bb ] +; CHECK: ret i32 %tmp.0 +; CHECK: } + +declare i32 @strlen(i8*) nounwind readonly diff --git a/llvm/test/Transforms/NewGVN/calls-readonly.ll b/llvm/test/Transforms/NewGVN/calls-readonly.ll new file mode 100644 index 0000000..8bc3bf2 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/calls-readonly.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +; Should delete the second call to strlen even though the intervening strchr call exists. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) nounwind readonly { +entry: + %0 = tail call i32 @strlen(i8* %P) ; [#uses=2] + %1 = icmp eq i32 %0, 0 ; [#uses=1] + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + %2 = sdiv i32 %x, %y ; [#uses=1] + br label %bb1 + +bb1: ; preds = %bb, %entry + %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ] ; [#uses=1] + %3 = tail call i8* @strchr(i8* %Q, i32 97) ; [#uses=1] + %4 = tail call i32 @strlen(i8* %P) ; [#uses=1] + %5 = add i32 %x_addr.0, %0 ; [#uses=1] + %.sum = sub i32 %5, %4 ; [#uses=1] + %6 = getelementptr i8, i8* %3, i32 %.sum ; [#uses=1] + ret i8* %6 +} + +; CHECK: define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) #0 { +; CHECK: entry: +; CHECK-NEXT: %0 = tail call i32 @strlen(i8* %P) +; CHECK-NEXT: %1 = icmp eq i32 %0, 0 +; CHECK-NEXT: br i1 %1, label %bb, label %bb1 +; CHECK: bb: +; CHECK-NEXT: %2 = sdiv i32 %x, %y +; CHECK-NEXT: br label %bb1 +; CHECK: bb1: +; CHECK-NEXT: %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ] +; CHECK-NEXT: %3 = tail call i8* @strchr(i8* %Q, i32 97) +; CHECK-NEXT: %4 = add i32 %x_addr.0, %0 +; CHECK-NEXT: %5 = getelementptr i8, i8* %3, i32 %x_addr.0 +; CHECK-NEXT: ret i8* %5 +; CHECK: } + +declare i32 @strlen(i8*) nounwind readonly + +declare i8* @strchr(i8*, i32) nounwind readonly diff --git a/llvm/test/Transforms/NewGVN/commute.ll b/llvm/test/Transforms/NewGVN/commute.ll new file mode 100644 index 0000000..ab7541b --- /dev/null +++ b/llvm/test/Transforms/NewGVN/commute.ll @@ -0,0 +1,23 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +declare void @use(i32, i32) + +define void @foo(i32 %x, i32 %y) { + ; CHECK-LABEL: @foo( + %add1 = add i32 %x, %y + %add2 = add i32 %y, %x + call void @use(i32 %add1, i32 %add2) + ; CHECK: @use(i32 %add1, i32 %add1) + ret void +} + +declare void @vse(i1, i1) + +define void @bar(i32 %x, i32 %y) { + ; CHECK-LABEL: @bar( + %cmp1 = icmp ult i32 %x, %y + %cmp2 = icmp ugt i32 %y, %x + call void @vse(i1 %cmp1, i1 %cmp2) + ; CHECK: @vse(i1 %cmp1, i1 %cmp1) + ret void +} diff --git a/llvm/test/Transforms/NewGVN/cond_br.ll b/llvm/test/Transforms/NewGVN/cond_br.ll new file mode 100644 index 0000000..a2584f3 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/cond_br.ll @@ -0,0 +1,55 @@ +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +@y = external global i32 +@z = external global i32 + +; Function Attrs: nounwind ssp uwtable +define void @foo(i32 %x) { +; CHECK: @foo(i32 %x) +; CHECK: %.pre = load i32, i32* @y +; CHECK: call void @bar(i32 %.pre) + + %t = sub i32 %x, %x + %.pre = load i32, i32* @y, align 4 + %cmp = icmp sgt i32 %t, 2 + br i1 %cmp, label %if.then, label %entry.if.end_crit_edge + +entry.if.end_crit_edge: ; preds = %entry + br label %if.end + +if.then: ; preds = %entry + %add = add nsw i32 %x, 3 + store i32 %add, i32* @y, align 4 + br label %if.end + +if.end: ; preds = %entry.if.end_crit_edge, %if.then + %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add, %if.then ] + tail call void @bar(i32 %1) + ret void +} + +define void @foo2(i32 %x) { +; CHECK: @foo2(i32 %x) +; CHECK: %.pre = load i32, i32* @y +; CHECK: tail call void @bar(i32 %.pre) +entry: + %t = sub i32 %x, %x + %.pre = load i32, i32* @y, align 4 + %cmp = icmp sgt i32 %t, 2 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %add = add nsw i32 %x, 3 + store i32 %add, i32* @y, align 4 + br label %if.end + +if.else: ; preds = %entry + store i32 1, i32* @z, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %0 = phi i32 [ %.pre, %if.else ], [ %add, %if.then ] + tail call void @bar(i32 %0) + ret void +} + +declare void @bar(i32) diff --git a/llvm/test/Transforms/NewGVN/cond_br2.ll b/llvm/test/Transforms/NewGVN/cond_br2.ll new file mode 100644 index 0000000..e511ff7 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/cond_br2.ll @@ -0,0 +1,141 @@ +; XFAIL: * +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] } +%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" } +%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" } +%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" } +%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" } +%"union.llvm::SmallVectorBase::U" = type { x86_fp80 } + +; Function Attrs: ssp uwtable +define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK: @_Z4testv() +; CHECK: invoke.cont: +; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14 +; CHECK: Retry.i10: + +entry: + %sv = alloca %"class.llvm::SmallVector", align 16 + %0 = bitcast %"class.llvm::SmallVector"* %sv to i8* + call void @llvm.lifetime.start(i64 64, i8* %0) #1 + %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0 + %FirstEl.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 3 + %1 = bitcast %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i to i8* + store i8* %1, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4 + %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1 + store i8* %1, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !4 + %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2 + %add.ptr.i.i.i.i2.i.i = getelementptr inbounds %"union.llvm::SmallVectorBase::U", %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i, i64 2 + %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8* + store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4 + %EndX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1 + %2 = load i8*, i8** %EndX.i, align 8, !tbaa !4 + %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2 + %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i + br i1 %cmp.i, label %Retry.i, label %if.end.i + +Retry.i: ; preds = %.noexc, %entry + %3 = phi i8* [ %2, %entry ], [ %.pre.i, %.noexc ] + %new.isnull.i = icmp eq i8* %3, null + br i1 %new.isnull.i, label %invoke.cont, label %new.notnull.i + +new.notnull.i: ; preds = %Retry.i + %4 = bitcast i8* %3 to i32* + store i32 1, i32* %4, align 4, !tbaa !5 + br label %invoke.cont + +if.end.i: ; preds = %entry + %5 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0 + invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %5, i64 0, i64 4) + to label %.noexc unwind label %lpad + +.noexc: ; preds = %if.end.i + %.pre.i = load i8*, i8** %EndX.i, align 8, !tbaa !4 + br label %Retry.i + +invoke.cont: ; preds = %new.notnull.i, %Retry.i + %add.ptr.i = getelementptr inbounds i8, i8* %3, i64 4 + store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4 + %6 = load i8*, i8** %CapacityX.i, align 16, !tbaa !4 + %cmp.i8 = icmp ult i8* %add.ptr.i, %6 + br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14 + +Retry.i10: ; preds = %if.end.i14 + %.pre.i13 = load i8*, i8** %EndX.i, align 8, !tbaa !4 + %new.isnull.i9 = icmp eq i8* %.pre.i13, null + br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11 + +new.notnull.i11: ; preds = %invoke.cont, %Retry.i10 + %7 = phi i8* [ %.pre.i13, %Retry.i10 ], [ %add.ptr.i, %invoke.cont ] + %8 = bitcast i8* %7 to i32* + store i32 2, i32* %8, align 4, !tbaa !5 + br label %invoke.cont2 + +if.end.i14: ; preds = %invoke.cont + %9 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0 + invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %9, i64 0, i64 4) + to label %Retry.i10 unwind label %lpad + +invoke.cont2: ; preds = %new.notnull.i11, %Retry.i10 + %10 = phi i8* [ null, %Retry.i10 ], [ %7, %new.notnull.i11 ] + %add.ptr.i12 = getelementptr inbounds i8, i8* %10, i64 4 + store i8* %add.ptr.i12, i8** %EndX.i, align 8, !tbaa !4 + invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"* %sv) + to label %invoke.cont3 unwind label %lpad + +invoke.cont3: ; preds = %invoke.cont2 + %11 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4 + %cmp.i.i.i.i19 = icmp eq i8* %11, %1 + br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20 + +if.then.i.i.i20: ; preds = %invoke.cont3 + call void @free(i8* %11) #1 + br label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21 + +_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21: ; preds = %invoke.cont3, %if.then.i.i.i20 + call void @llvm.lifetime.end(i64 64, i8* %0) #1 + ret void + +lpad: ; preds = %if.end.i14, %if.end.i, %invoke.cont2 + %12 = landingpad { i8*, i32 } + cleanup + %13 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4 + %cmp.i.i.i.i = icmp eq i8* %13, %1 + br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i + +if.then.i.i.i: ; preds = %lpad + call void @free(i8* %13) #1 + br label %eh.resume + +eh.resume: ; preds = %if.then.i.i.i, %lpad + resume { i8*, i32 } %12 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +declare i32 @__gxx_personality_v0(...) + +declare void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"*) #2 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +declare void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"*, i64, i64) #2 + +; Function Attrs: nounwind +declare void @free(i8* nocapture) #3 + +attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } +attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = !{!"any pointer", !1} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} +!3 = !{!"int", !1} +!4 = !{!0, !0, i64 0} +!5 = !{!3, !3, i64 0} diff --git a/llvm/test/Transforms/NewGVN/condprop.ll b/llvm/test/Transforms/NewGVN/condprop.ll new file mode 100644 index 0000000..898690d --- /dev/null +++ b/llvm/test/Transforms/NewGVN/condprop.ll @@ -0,0 +1,300 @@ +; XFAIL: * +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +@a = external global i32 ; [#uses=7] + +; CHECK-LABEL: @test1( +define i32 @test1() nounwind { +entry: + %0 = load i32, i32* @a, align 4 + %1 = icmp eq i32 %0, 4 + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + br label %bb8 + +bb1: ; preds = %entry + %2 = load i32, i32* @a, align 4 + %3 = icmp eq i32 %2, 5 + br i1 %3, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb8 + +bb3: ; preds = %bb1 + %4 = load i32, i32* @a, align 4 + %5 = icmp eq i32 %4, 4 +; CHECK: br i1 false, label %bb4, label %bb5 + br i1 %5, label %bb4, label %bb5 + +bb4: ; preds = %bb3 + %6 = load i32, i32* @a, align 4 + %7 = add i32 %6, 5 + br label %bb8 + +bb5: ; preds = %bb3 + %8 = load i32, i32* @a, align 4 + %9 = icmp eq i32 %8, 5 +; CHECK: br i1 false, label %bb6, label %bb7 + br i1 %9, label %bb6, label %bb7 + +bb6: ; preds = %bb5 + %10 = load i32, i32* @a, align 4 + %11 = add i32 %10, 4 + br label %bb8 + +bb7: ; preds = %bb5 + %12 = load i32, i32* @a, align 4 + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb4, %bb2, %bb + %.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ] + br label %return + +return: ; preds = %bb8 + ret i32 %.0 +} + +declare void @foo(i1) +declare void @bar(i32) + +; CHECK-LABEL: @test3( +define void @test3(i32 %x, i32 %y) { + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + br i1 %z, label %both_zero, label %nope +both_zero: + call void @foo(i1 %xz) +; CHECK: call void @foo(i1 true) + call void @foo(i1 %yz) +; CHECK: call void @foo(i1 true) + call void @bar(i32 %x) +; CHECK: call void @bar(i32 0) + call void @bar(i32 %y) +; CHECK: call void @bar(i32 0) + ret void +nope: + call void @foo(i1 %z) +; CHECK: call void @foo(i1 false) + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i1 %b, i32 %x) { + br i1 %b, label %sw, label %case3 +sw: + switch i32 %x, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case0 + i32 3, label %case3 + i32 4, label %default + ] +default: +; CHECK: default: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 %x) + ret void +case0: +; CHECK: case0: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 %x) + ret void +case1: +; CHECK: case1: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 1) + ret void +case3: +; CHECK: case3: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 %x) + ret void +} + +; CHECK-LABEL: @test5( +define i1 @test5(i32 %x, i32 %y) { + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp ne i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp2 + +different: + %cmp3 = icmp eq i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test6( +define i1 @test6(i32 %x, i32 %y) { + %cmp2 = icmp ne i32 %x, %y + %cmp = icmp eq i32 %x, %y + %cmp3 = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test6_fp( +define i1 @test6_fp(float %x, float %y) { + %cmp2 = fcmp une float %x, %y + %cmp = fcmp oeq float %x, %y + %cmp3 = fcmp oeq float %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test7( +define i1 @test7(i32 %x, i32 %y) { + %cmp = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp sle i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp2 + +different: + %cmp3 = icmp sgt i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test7_fp( +define i1 @test7_fp(float %x, float %y) { + %cmp = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = fcmp ule float %x, %y +; CHECK: ret i1 false + ret i1 %cmp2 + +different: + %cmp3 = fcmp ogt float %x, %y +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test8( +define i1 @test8(i32 %x, i32 %y) { + %cmp2 = icmp sle i32 %x, %y + %cmp = icmp sgt i32 %x, %y + %cmp3 = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test8_fp( +define i1 @test8_fp(float %x, float %y) { + %cmp2 = fcmp ule float %x, %y + %cmp = fcmp ogt float %x, %y + %cmp3 = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; PR1768 +; CHECK-LABEL: @test9( +define i32 @test9(i32 %i, i32 %j) { + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff +; CHECK: ret i32 0 + +ret: + ret i32 5 +; CHECK: ret i32 5 +} + +; PR1768 +; CHECK-LABEL: @test10( +define i32 @test10(i32 %j, i32 %i) { + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff +; CHECK: ret i32 0 + +ret: + ret i32 5 +; CHECK: ret i32 5 +} + +declare i32 @yogibar() + +; CHECK-LABEL: @test11( +define i32 @test11(i32 %x) { + %v0 = call i32 @yogibar() + %v1 = call i32 @yogibar() + %cmp = icmp eq i32 %v0, %v1 + br i1 %cmp, label %cond_true, label %next + +cond_true: + ret i32 %v1 +; CHECK: ret i32 %v0 + +next: + %cmp2 = icmp eq i32 %x, %v0 + br i1 %cmp2, label %cond_true2, label %next2 + +cond_true2: + ret i32 %v0 +; CHECK: ret i32 %x + +next2: + ret i32 0 +} + +; CHECK-LABEL: @test12( +define i32 @test12(i32 %x) { + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond_true, label %cond_false + +cond_true: + br label %ret + +cond_false: + br label %ret + +ret: + %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ] +; CHECK: %res = phi i32 [ 0, %cond_true ], [ %x, %cond_false ] + ret i32 %res +} diff --git a/llvm/test/Transforms/NewGVN/crash-no-aa.ll b/llvm/test/Transforms/NewGVN/crash-no-aa.ll new file mode 100644 index 0000000..d511422 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/crash-no-aa.ll @@ -0,0 +1,15 @@ +; RUN: opt -disable-basicaa -newgvn -S < %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-freebsd8.0" + +; PR5744 +define i32 @test1({i16, i32} *%P) { + %P2 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 0 + store i16 42, i16* %P2 + + %P3 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 1 + %V = load i32, i32* %P3 + ret i32 %V +} + diff --git a/llvm/test/Transforms/NewGVN/crash.ll b/llvm/test/Transforms/NewGVN/crash.ll new file mode 100644 index 0000000..9fbe281 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/crash.ll @@ -0,0 +1,201 @@ +; RUN: opt -newgvn -disable-output < %s + +; PR5631 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0" + +define i32* @test1(i8* %name, i32 %namelen, i32* %o, i32 %expected_type) nounwind ssp { +entry: + br i1 undef, label %if.end13, label %while.body.preheader + + +if.end13: ; preds = %if.then6 + br label %while.body.preheader + +while.body.preheader: ; preds = %if.end13, %if.end + br label %while.body + +while.body: ; preds = %while.body.backedge, %while.body.preheader + %o.addr.0 = phi i32* [ undef, %while.body.preheader ], [ %o.addr.0.be, %while.body.backedge ] ; [#uses=2] + br i1 false, label %return.loopexit, label %lor.lhs.false + +lor.lhs.false: ; preds = %while.body + %tmp20 = bitcast i32* %o.addr.0 to i32* ; [#uses=1] + %tmp22 = load i32, i32* %tmp20 ; [#uses=0] + br i1 undef, label %land.lhs.true24, label %if.end31 + +land.lhs.true24: ; preds = %lor.lhs.false + %call28 = call i32* @parse_object(i8* undef) nounwind ; [#uses=0] + br i1 undef, label %return.loopexit, label %if.end31 + +if.end31: ; preds = %land.lhs.true24, %lor.lhs.false + br i1 undef, label %return.loopexit, label %if.end41 + +if.end41: ; preds = %if.end31 + %tmp43 = bitcast i32* %o.addr.0 to i32* ; [#uses=1] + %tmp45 = load i32, i32* %tmp43 ; [#uses=0] + br i1 undef, label %if.then50, label %if.else + +if.then50: ; preds = %if.end41 + %tmp53 = load i32*, i32** undef ; [#uses=1] + br label %while.body.backedge + +if.else: ; preds = %if.end41 + br i1 undef, label %if.then62, label %if.else67 + +if.then62: ; preds = %if.else + br label %while.body.backedge + +while.body.backedge: ; preds = %if.then62, %if.then50 + %o.addr.0.be = phi i32* [ %tmp53, %if.then50 ], [ undef, %if.then62 ] ; [#uses=1] + br label %while.body + +if.else67: ; preds = %if.else + ret i32* null + +return.loopexit: ; preds = %if.end31, %land.lhs.true24, %while.body + ret i32* undef +} + +declare i32* @parse_object(i8*) + + + + + + +%struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8 } + +@attribute_tables = external global [4 x %struct.attribute_spec*] ; <[4 x %struct.attribute_spec*]*> [#uses=2] + +define void @test2() nounwind { +entry: + br label %bb69.i + +bb69.i: ; preds = %bb57.i.preheader + %tmp4 = getelementptr inbounds [4 x %struct.attribute_spec*], [4 x %struct.attribute_spec*]* @attribute_tables, i32 0, i32 undef ; <%struct.attribute_spec**> [#uses=1] + %tmp3 = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1] + br label %bb65.i + +bb65.i: ; preds = %bb65.i.preheader, %bb64.i + %storemerge6.i = phi i32 [ 1, %bb64.i ], [ 0, %bb69.i ] ; [#uses=3] + %scevgep14 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp3, i32 %storemerge6.i, i32 0 ; [#uses=1] + %tmp2 = load i8*, i8** %scevgep14, align 4 ; [#uses=0] + %tmp = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1] + %scevgep1516 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp, i32 %storemerge6.i, i32 0 ; [#uses=0] + unreachable + +bb64.i: ; Unreachable + br label %bb65.i + +bb66.i: ; Unreachable + br label %bb69.i +} + + + +; rdar://7438974 + +@g = external global i64, align 8 + +define i32* @test3() { +do.end17.i: + %tmp18.i = load i7*, i7** undef + %tmp1 = bitcast i7* %tmp18.i to i8* + br i1 undef, label %do.body36.i, label %if.then21.i + +if.then21.i: + %tmp2 = bitcast i7* %tmp18.i to i8* + ret i32* undef + +do.body36.i: + %ivar38.i = load i64, i64* @g + %tmp3 = bitcast i7* %tmp18.i to i8* + %add.ptr39.sum.i = add i64 %ivar38.i, 8 + %tmp40.i = getelementptr inbounds i8, i8* %tmp3, i64 %add.ptr39.sum.i + %tmp4 = bitcast i8* %tmp40.i to i64* + %tmp41.i = load i64, i64* %tmp4 + br i1 undef, label %if.then48.i, label %do.body57.i + +if.then48.i: + %call54.i = call i32 @foo2() + br label %do.body57.i + +do.body57.i: + %tmp58.i = load i7*, i7** undef + %ivar59.i = load i64, i64* @g + %tmp5 = bitcast i7* %tmp58.i to i8* + %add.ptr65.sum.i = add i64 %ivar59.i, 8 + %tmp66.i = getelementptr inbounds i8, i8* %tmp5, i64 %add.ptr65.sum.i + %tmp6 = bitcast i8* %tmp66.i to i64* + %tmp67.i = load i64, i64* %tmp6 + ret i32* undef +} + +declare i32 @foo2() + + + +define i32 @test4() { +entry: + ret i32 0 + +dead: + %P2 = getelementptr i32, i32 *%P2, i32 52 + %Q2 = getelementptr i32, i32 *%Q2, i32 52 + store i32 4, i32* %P2 + %A = load i32, i32* %Q2 + br i1 true, label %dead, label %dead2 + +dead2: + ret i32 %A +} + + +; PR9841 +define fastcc i8 @test5(i8* %P) nounwind { +entry: + %0 = load i8, i8* %P, align 2 + + %Q = getelementptr i8, i8* %P, i32 1 + %1 = load i8, i8* %Q, align 1 + ret i8 %1 +} + + +; Test that a GEP in an unreachable block with the following form doesn't crash +; GVN: +; +; %x = gep %some.type %x, ... + +%struct.type = type { i64, i32, i32 } + +define fastcc void @func() nounwind uwtable ssp align 2 { +entry: + br label %reachable.bb + +;; Unreachable code. + +unreachable.bb: + %gep.val = getelementptr inbounds %struct.type, %struct.type* %gep.val, i64 1 + br i1 undef, label %u2.bb, label %u1.bb + +u1.bb: + %tmp1 = getelementptr inbounds %struct.type, %struct.type* %gep.val, i64 0, i32 0 + store i64 -1, i64* %tmp1, align 8 + br label %unreachable.bb + +u2.bb: + %0 = load i32, i32* undef, align 4 + %conv.i.i.i.i.i = zext i32 %0 to i64 + br label %u2.bb + +;; Reachable code. + +reachable.bb: + br label %r1.bb + +r1.bb: + br label %u2.bb +} diff --git a/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll b/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll new file mode 100644 index 0000000..11d5bde --- /dev/null +++ b/llvm/test/Transforms/NewGVN/dbg-redundant-load.ll @@ -0,0 +1,52 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +; Check that the redundant load from %if.then is removed. +; Also, check that the debug location associated to load %0 still refers to +; line 3 and not line 6. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK: @test_redundant_load( +; CHECK-LABEL: entry: +; CHECK-NEXT: load i32, i32* %Y, align 4, !dbg ![[LOC:[0-9]+]] +; CHECK-LABEL: if.then: +; CHECK-NOT: load +; CHECK-LABEL: if.end: +; CHECK: ![[LOC]] = !DILocation(line: 3, scope: !{{.*}}) + +define i32 @test_redundant_load(i32 %X, i32* %Y) !dbg !6 { +entry: + %0 = load i32, i32* %Y, align 4, !dbg !8 + %cmp = icmp sgt i32 %X, -1, !dbg !9 + br i1 %cmp, label %if.then, label %if.end, !dbg !9 + +if.then: ; preds = %entry + %1 = load i32, i32* %Y, align 4, !dbg !10 + %add = add nsw i32 %0, %1, !dbg !10 + call void @foo(), !dbg !11 + br label %if.end, !dbg !12 + +if.end: ; preds = %if.then, %entry + %Result.0 = phi i32 [ %add, %if.then ], [ %0, %entry ] + ret i32 %Result.0, !dbg !13 +} + +declare void @foo() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "test.cpp", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = distinct !DISubprogram(name: "test_redundant_load", scope: !1, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 3, scope: !6) +!9 = !DILocation(line: 5, scope: !6) +!10 = !DILocation(line: 6, scope: !6) +!11 = !DILocation(line: 7, scope: !6) +!12 = !DILocation(line: 8, scope: !6) +!13 = !DILocation(line: 10, scope: !6) diff --git a/llvm/test/Transforms/NewGVN/edge.ll b/llvm/test/Transforms/NewGVN/edge.ll new file mode 100644 index 0000000..2d453bd --- /dev/null +++ b/llvm/test/Transforms/NewGVN/edge.ll @@ -0,0 +1,171 @@ +; XFAIL: * +; RUN: opt -newgvn -S < %s | FileCheck %s + +define i32 @f1(i32 %x) { + ; CHECK-LABEL: define i32 @f1( +bb0: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %bb2, label %bb1 +bb1: + br label %bb2 +bb2: + %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ] + %foo = add i32 %cond, %x + ret i32 %foo + ; CHECK: bb2: + ; CHECK: ret i32 %x +} + +define i32 @f2(i32 %x) { + ; CHECK-LABEL: define i32 @f2( +bb0: + %cmp = icmp ne i32 %x, 0 + br i1 %cmp, label %bb1, label %bb2 +bb1: + br label %bb2 +bb2: + %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ] + %foo = add i32 %cond, %x + ret i32 %foo + ; CHECK: bb2: + ; CHECK: ret i32 %x +} + +define i32 @f3(i32 %x) { + ; CHECK-LABEL: define i32 @f3( +bb0: + switch i32 %x, label %bb1 [ i32 0, label %bb2] +bb1: + br label %bb2 +bb2: + %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ] + %foo = add i32 %cond, %x + ret i32 %foo + ; CHECK: bb2: + ; CHECK: ret i32 %x +} + +declare void @g(i1) +define void @f4(i8 * %x) { +; CHECK-LABEL: define void @f4( +bb0: + %y = icmp eq i8* null, %x + br i1 %y, label %bb2, label %bb1 +bb1: + br label %bb2 +bb2: + %zed = icmp eq i8* null, %x + call void @g(i1 %zed) +; CHECK: call void @g(i1 %y) + ret void +} + +define double @fcmp_oeq_not_zero(double %x, double %y) { +entry: + %cmp = fcmp oeq double %y, 2.0 + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_not_zero( +; CHECK: %div = fdiv double %x, 2.0 +} + +define double @fcmp_une_not_zero(double %x, double %y) { +entry: + %cmp = fcmp une double %y, 2.0 + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_not_zero( +; CHECK: %div = fdiv double %x, 2.0 +} + +; PR22376 - We can't propagate zero constants because -0.0 +; compares equal to 0.0. If %y is -0.0 in this test case, +; we would produce the wrong sign on the infinity return value. +define double @fcmp_oeq_zero(double %x, double %y) { +entry: + %cmp = fcmp oeq double %y, 0.0 + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_zero( +; CHECK: %div = fdiv double %x, %y +} + +define double @fcmp_une_zero(double %x, double %y) { +entry: + %cmp = fcmp une double %y, -0.0 + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_zero( +; CHECK: %div = fdiv double %x, %y +} + +; We also cannot propagate a value if it's not a constant. +; This is because the value could be 0.0 or -0.0. + +define double @fcmp_oeq_maybe_zero(double %x, double %y, double %z1, double %z2) { +entry: + %z = fadd double %z1, %z2 + %cmp = fcmp oeq double %y, %z + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %z + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_maybe_zero( +; CHECK: %div = fdiv double %x, %z +} + +define double @fcmp_une_maybe_zero(double %x, double %y, double %z1, double %z2) { +entry: + %z = fadd double %z1, %z2 + %cmp = fcmp une double %y, %z + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %z + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_maybe_zero( +; CHECK: %div = fdiv double %x, %z +} diff --git a/llvm/test/Transforms/NewGVN/fence.ll b/llvm/test/Transforms/NewGVN/fence.ll new file mode 100644 index 0000000..ac4270d --- /dev/null +++ b/llvm/test/Transforms/NewGVN/fence.ll @@ -0,0 +1,70 @@ +; XFAIL: * +; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s + +; We can value forward across the fence since we can (semantically) +; reorder the following load before the fence. +define i32 @test(i32* %addr.i) { +; CHECK-LABEL: @test +; CHECK: store +; CHECK: fence +; CHECK-NOT: load +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + %a = load i32, i32* %addr.i, align 4 + ret i32 %a +} + +; Same as above +define i32 @test2(i32* %addr.i) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: fence +; CHECK-NOT: load +; CHECK: ret + %a = load i32, i32* %addr.i, align 4 + fence release + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + +; We can not value forward across an acquire barrier since we might +; be syncronizing with another thread storing to the same variable +; followed by a release fence. This is not so much enforcing an +; ordering property (though it is that too), but a liveness +; property. We expect to eventually see the value of store by +; another thread when spinning on that location. +define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) { +; CHECK-LABEL: @test3 +; CHECK: load +; CHECK: fence +; CHECK: load +; CHECK: ret i32 %res + ; the following code is intented to model the unrolling of + ; two iterations in a spin loop of the form: + ; do { fence acquire: tmp = *%addr.i; ) while (!tmp); + ; It's hopefully clear that allowing PRE to turn this into: + ; if (!*%addr.i) while(true) {} would be unfortunate + fence acquire + %a = load i32, i32* %addr.i, align 4 + fence acquire + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + +; Another example of why forwarding across an acquire fence is problematic +; can be seen in a normal locking operation. Say we had: +; *p = 5; unlock(l); lock(l); use(p); +; forwarding the store to p would be invalid. A reasonable implementation +; of unlock and lock might be: +; unlock() { atomicrmw sub %l, 1 unordered; fence release } +; lock() { +; do { +; %res = cmpxchg %p, 0, 1, monotonic monotonic +; } while(!%res.success) +; fence acquire; +; } +; Given we chose to forward across the release fence, we clearly can't forward +; across the acquire fence as well. + diff --git a/llvm/test/Transforms/NewGVN/flags.ll b/llvm/test/Transforms/NewGVN/flags.ll new file mode 100644 index 0000000..d03edd6 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/flags.ll @@ -0,0 +1,19 @@ +; XFAIL: * +; RUN: opt -newgvn -S < %s | FileCheck %s + +declare void @use(i1) + +define void @test1(float %x, float %y) { +entry: + %cmp1 = fcmp nnan oeq float %y, %x + %cmp2 = fcmp oeq float %x, %y + call void @use(i1 %cmp1) + call void @use(i1 %cmp2) + ret void +} + +; CHECK-LABEL: define void @test1( +; CHECK: %[[cmp:.*]] = fcmp oeq float %y, %x +; CHECK-NEXT: call void @use(i1 %[[cmp]]) +; CHECK-NEXT: call void @use(i1 %[[cmp]]) +; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/NewGVN/fold-const-expr.ll b/llvm/test/Transforms/NewGVN/fold-const-expr.ll new file mode 100644 index 0000000..8e3b39a --- /dev/null +++ b/llvm/test/Transforms/NewGVN/fold-const-expr.ll @@ -0,0 +1,100 @@ +; XFAIL: * +; GVN failed to do constant expression folding and expanded +; them unfolded in many places, producing exponentially large const +; expressions. As a result, the compilation never fisished. +; This test checks that we are folding constant expression +; PR 28418 +; RUN: opt -newgvn -S < %s | FileCheck %s + +%2 = type { i32, i32, i32, i32, i32 } +define i32 @_Z16vector3util_mainv(i32 %x, i32 %y) { + %tmp1 = alloca %2, align 4 + %tmp114 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp115 = bitcast i32* %tmp114 to <4 x i32>* + store <4 x i32> , <4 x i32>* %tmp115, align 4 + %tmp1683 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp1688 = load i32, i32* %tmp1683, align 4 + %tmp1693 = shl i32 %tmp1688, 5 + %tmp1694 = xor i32 %tmp1693, %tmp1688 + %tmp1695 = lshr i32 %tmp1694, 7 + %tmp1696 = xor i32 %tmp1695, %tmp1694 + %tmp1697 = shl i32 %tmp1696, 22 + %tmp1698 = xor i32 %tmp1697, %tmp1696 + %tmp1707 = shl i32 %tmp1698, 5 + %tmp1708 = xor i32 %tmp1707, %tmp1698 + %tmp1709 = lshr i32 %tmp1708, 7 + %tmp1710 = xor i32 %tmp1709, %tmp1708 + %tmp1711 = shl i32 %tmp1710, 22 + %tmp1712 = xor i32 %tmp1711, %tmp1710 + %tmp1721 = shl i32 %tmp1712, 5 + %tmp1722 = xor i32 %tmp1721, %tmp1712 + %tmp1723 = lshr i32 %tmp1722, 7 + %tmp1724 = xor i32 %tmp1723, %tmp1722 + %tmp1725 = shl i32 %tmp1724, 22 + %tmp1726 = xor i32 %tmp1725, %tmp1724 + %tmp1735 = shl i32 %tmp1726, 5 + %tmp1736 = xor i32 %tmp1735, %tmp1726 + %tmp1737 = lshr i32 %tmp1736, 7 + %tmp1738 = xor i32 %tmp1737, %tmp1736 + %tmp1739 = shl i32 %tmp1738, 22 + %tmp1740 = xor i32 %tmp1739, %tmp1738 + store i32 %tmp1740, i32* %tmp1683, align 4 +; CHECK: store i32 310393545, i32* %tmp114, align 4 + %tmp1756 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp1761 = load i32, i32* %tmp1756, align 4 + %tmp1766 = shl i32 %tmp1761, 5 + %tmp1767 = xor i32 %tmp1766, %tmp1761 + %tmp1768 = lshr i32 %tmp1767, 7 + %tmp1769 = xor i32 %tmp1768, %tmp1767 + %tmp1770 = shl i32 %tmp1769, 22 + %tmp1771 = xor i32 %tmp1770, %tmp1769 + %tmp1780 = shl i32 %tmp1771, 5 + %tmp1781 = xor i32 %tmp1780, %tmp1771 + %tmp1782 = lshr i32 %tmp1781, 7 + %tmp1783 = xor i32 %tmp1782, %tmp1781 + %tmp1784 = shl i32 %tmp1783, 22 + %tmp1785 = xor i32 %tmp1784, %tmp1783 + %tmp1794 = shl i32 %tmp1785, 5 + %tmp1795 = xor i32 %tmp1794, %tmp1785 + %tmp1796 = lshr i32 %tmp1795, 7 + %tmp1797 = xor i32 %tmp1796, %tmp1795 + %tmp1798 = shl i32 %tmp1797, 22 + %tmp1799 = xor i32 %tmp1798, %tmp1797 + %tmp1808 = shl i32 %tmp1799, 5 + %tmp1809 = xor i32 %tmp1808, %tmp1799 + %tmp1810 = lshr i32 %tmp1809, 7 + %tmp1811 = xor i32 %tmp1810, %tmp1809 + %tmp1812 = shl i32 %tmp1811, 22 + %tmp1813 = xor i32 %tmp1812, %tmp1811 + store i32 %tmp1813, i32* %tmp1756, align 4 +; CHECK: store i32 -383584258, i32* %tmp114, align 4 + %tmp2645 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp2650 = load i32, i32* %tmp2645, align 4 + %tmp2655 = shl i32 %tmp2650, 5 + %tmp2656 = xor i32 %tmp2655, %tmp2650 + %tmp2657 = lshr i32 %tmp2656, 7 + %tmp2658 = xor i32 %tmp2657, %tmp2656 + %tmp2659 = shl i32 %tmp2658, 22 + %tmp2660 = xor i32 %tmp2659, %tmp2658 + %tmp2669 = shl i32 %tmp2660, 5 + %tmp2670 = xor i32 %tmp2669, %tmp2660 + %tmp2671 = lshr i32 %tmp2670, 7 + %tmp2672 = xor i32 %tmp2671, %tmp2670 + %tmp2673 = shl i32 %tmp2672, 22 + %tmp2674 = xor i32 %tmp2673, %tmp2672 + %tmp2683 = shl i32 %tmp2674, 5 + %tmp2684 = xor i32 %tmp2683, %tmp2674 + %tmp2685 = lshr i32 %tmp2684, 7 + %tmp2686 = xor i32 %tmp2685, %tmp2684 + %tmp2687 = shl i32 %tmp2686, 22 + %tmp2688 = xor i32 %tmp2687, %tmp2686 + %tmp2697 = shl i32 %tmp2688, 5 + %tmp2698 = xor i32 %tmp2697, %tmp2688 + %tmp2699 = lshr i32 %tmp2698, 7 + %tmp2700 = xor i32 %tmp2699, %tmp2698 + %tmp2701 = shl i32 %tmp2700, 22 + %tmp2702 = xor i32 %tmp2701, %tmp2700 + store i32 %tmp2702, i32* %tmp2645, align 4 +; CHECK: store i32 -57163022, i32* %tmp114, align 4 + ret i32 0 +} diff --git a/llvm/test/Transforms/NewGVN/fpmath.ll b/llvm/test/Transforms/NewGVN/fpmath.ll new file mode 100644 index 0000000..2ff8fff --- /dev/null +++ b/llvm/test/Transforms/NewGVN/fpmath.ll @@ -0,0 +1,45 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +define double @test1(double %x, double %y) { +; CHECK: @test1(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y +; CHECK-NOT: fpmath +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test2(double %x, double %y) { +; CHECK: @test2(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !0 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y, !fpmath !0 + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test3(double %x, double %y) { +; CHECK: @test3(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !1 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !1 + %add2 = fadd double %x, %y, !fpmath !0 + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test4(double %x, double %y) { +; CHECK: @test4(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !1 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y, !fpmath !1 + %foo = fadd double %add1, %add2 + ret double %foo +} + +!0 = !{ float 5.0 } +!1 = !{ float 2.5 } diff --git a/llvm/test/Transforms/NewGVN/funclet.ll b/llvm/test/Transforms/NewGVN/funclet.ll new file mode 100644 index 0000000..44cb3ae --- /dev/null +++ b/llvm/test/Transforms/NewGVN/funclet.ll @@ -0,0 +1,44 @@ +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc" + +%eh.ThrowInfo = type { i32, i8*, i8*, i8* } +%struct.A = type { i32* } + +@"_TI1?AUA@@" = external constant %eh.ThrowInfo + +define i8 @f() personality i32 (...)* @__CxxFrameHandler3 { +entry: + %b = alloca i8 + %c = alloca i8 + store i8 42, i8* %b + store i8 13, i8* %c + invoke void @_CxxThrowException(i8* %b, %eh.ThrowInfo* nonnull @"_TI1?AUA@@") + to label %unreachable unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %cs1 = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %catchpad = catchpad within %cs1 [i8* null, i32 64, i8* null] + store i8 5, i8* %b + catchret from %catchpad to label %try.cont + +try.cont: ; preds = %catch + %load_b = load i8, i8* %b + %load_c = load i8, i8* %c + %add = add i8 %load_b, %load_c + ret i8 %add + +unreachable: ; preds = %entry + unreachable +} +; CHECK-LABEL: define i8 @f( +; CHECK: %[[load_b:.*]] = load i8, i8* %b +; CHECK-NEXT: %[[load_c:.*]] = load i8, i8* %c +; CHECK-NEXT: %[[add:.*]] = add i8 %[[load_b]], %[[load_c]] +; CHECK-NEXT: ret i8 %[[add]] + +declare i32 @__CxxFrameHandler3(...) + +declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*) diff --git a/llvm/test/Transforms/NewGVN/invariant.group.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll new file mode 100644 index 0000000..1151092 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/invariant.group.ll @@ -0,0 +1,338 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +@unknownPtr = external global i8 + +; CHECK-LABEL: define i8 @simple() { +define i8 @simple() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load i8, i8* %ptr, !invariant.group !0 + %c = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable1() { +define i8 @optimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + + call void @foo(i8* %ptr2); call to use %ptr2 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable2() { +define i8 @optimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + store i8 13, i8* %ptr ; can't use this store with invariant.group + %a = load i8, i8* %ptr + call void @bar(i8 %a) ; call to use %a + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 42 + ret i8 %b +} + +; CHECK-LABEL: define i8 @unoptimizable1() { +define i8 @unoptimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define void @indirectLoads() { +define void @indirectLoads() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + call void @llvm.assume(i1 %cmp.vtables) + + store %struct.A* %1, %struct.A** %a, align 8 + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + %6 = load %struct.A*, %struct.A** %a, align 8 + %7 = bitcast %struct.A* %6 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2 + %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 + %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 + + call void %8(%struct.A* %6) + %9 = load %struct.A*, %struct.A** %a, align 8 + %10 = bitcast %struct.A* %9 to void (%struct.A*)*** + + %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2 + %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 + %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %11(%struct.A* %9) + + %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !2 + %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0 + %12 = bitcast i8** %vfn6 to void (%struct.A*)** + %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %13(%struct.A* %9) + + ret void +} + +; CHECK-LABEL: define void @combiningBitCastWithLoad() { +define void @combiningBitCastWithLoad() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + + store %struct.A* %1, %struct.A** %a, align 8 +; CHECK-NOT: !invariant.group + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + + ret void +} + +; CHECK-LABEL:define void @loadCombine() { +define void @loadCombine() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK-NOT: load + %b = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %a) +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %b) + ret void +} + +; CHECK-LABEL: define void @loadCombine1() { +define void @loadCombine1() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group + %c = load i8, i8* %ptr +; CHECK-NOT: load + %d = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %c) +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %d) + ret void +} + +; CHECK-LABEL: define void @loadCombine2() { +define void @loadCombine2() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define void @loadCombine3() { +define void @loadCombine3() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]] + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define i8 @unoptimizable2() { +define i8 @unoptimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable3() { +define i8 @unoptimizable3() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @getPointer(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable4() { +define i8 @unoptimizable4() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile1() { +define i8 @volatile1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile2() { +define i8 @volatile2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @fun() { +define i8 @fun() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change +; CHECK: call void @bar(i8 42) + call void @bar(i8 %a) + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %newPtr = call i8* @getPointer(i8* %ptr) + %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) + + %unknownValue = load i8, i8* @unknownPtr +; FIXME: Can assume that %unknownValue == 42 +; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 + store i8 %unknownValue, i8* %ptr, !invariant.group !0 + + %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr +; CHECK: ret i8 %d + ret i8 %d +} + +declare void @foo(i8*) +declare void @bar(i8) +declare i8* @getPointer(i8*) +declare void @_ZN1A3fooEv(%struct.A*) +declare void @_ZN1AC1Ev(%struct.A*) +declare i8* @llvm.invariant.group.barrier(i8*) + +; Function Attrs: nounwind +declare void @llvm.assume(i1 %cmp.vtables) #0 + + +attributes #0 = { nounwind } +; CHECK: ![[OneMD]] = !{!"other ptr"} +!0 = !{!"magic ptr"} +!1 = !{!"other ptr"} +!2 = !{!"vtable_of_a"} diff --git a/llvm/test/Transforms/NewGVN/invariant.start.ll b/llvm/test/Transforms/NewGVN/invariant.start.ll new file mode 100644 index 0000000..69c8901 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/invariant.start.ll @@ -0,0 +1,59 @@ +; Test to make sure llvm.invariant.start calls are not treated as clobbers. +; RUN: opt < %s -newgvn -S | FileCheck %s + + +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind + +; We forward store to the load across the invariant.start intrinsic +define i8 @forward_store() { +; CHECK-LABEL: @forward_store +; CHECK: call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) +; CHECK-NOT: load +; CHECK: ret i8 0 + %a = alloca i8 + store i8 0, i8* %a + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + %r = load i8, i8* %a + ret i8 %r +} + +declare i8 @dummy(i8* nocapture) nounwind readonly + +; We forward store to the load in the non-local analysis case, +; i.e. invariant.start is in another basic block. +define i8 @forward_store_nonlocal(i1 %cond) { +; CHECK-LABEL: forward_store_nonlocal +; CHECK: call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) +; CHECK: ret i8 0 +; CHECK: ret i8 %val + %a = alloca i8 + store i8 0, i8* %a + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + br i1 %cond, label %loadblock, label %exit + +loadblock: + %r = load i8, i8* %a + ret i8 %r + +exit: + %val = call i8 @dummy(i8* %a) + ret i8 %val +} + +; We should not value forward %foo to the invariant.end corresponding to %bar. +define i8 @forward_store1() { +; CHECK-LABEL: forward_store1 +; CHECK: %foo = call {}* @llvm.invariant.start.p0i8 +; CHECK-NOT: load +; CHECK: %bar = call {}* @llvm.invariant.start.p0i8 +; CHECK: call void @llvm.invariant.end.p0i8({}* %bar, i64 1, i8* %a) +; CHECK: ret i8 0 + %a = alloca i8 + store i8 0, i8* %a + %foo = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + %r = load i8, i8* %a + %bar = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + call void @llvm.invariant.end.p0i8({}* %bar, i64 1, i8* %a) + ret i8 %r +} diff --git a/llvm/test/Transforms/NewGVN/lifetime-simple.ll b/llvm/test/Transforms/NewGVN/lifetime-simple.ll new file mode 100644 index 0000000..63e361c --- /dev/null +++ b/llvm/test/Transforms/NewGVN/lifetime-simple.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i8 @test(i8* %P) nounwind { +; CHECK: lifetime.start +; CHECK-NOT: load +; CHECK: lifetime.end +entry: + call void @llvm.lifetime.start(i64 32, i8* %P) + %0 = load i8, i8* %P + store i8 1, i8* %P + call void @llvm.lifetime.end(i64 32, i8* %P) + %1 = load i8, i8* %P + ret i8 %1 +} + +declare void @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly +declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P) diff --git a/llvm/test/Transforms/NewGVN/load-constant-mem.ll b/llvm/test/Transforms/NewGVN/load-constant-mem.ll new file mode 100644 index 0000000..215258b --- /dev/null +++ b/llvm/test/Transforms/NewGVN/load-constant-mem.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -basicaa -newgvn -instcombine -S | FileCheck %s +; PR4189 +@G = external constant [4 x i32] + +define i32 @test(i8* %p, i32 %i) nounwind { +entry: + %P = getelementptr [4 x i32], [4 x i32]* @G, i32 0, i32 %i + %A = load i32, i32* %P + store i8 4, i8* %p + %B = load i32, i32* %P + %C = sub i32 %A, %B + ret i32 %C +} + +; CHECK: define i32 @test(i8* %p, i32 %i) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i8 4, i8* %p, align 1 +; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: } diff --git a/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll b/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll new file mode 100644 index 0000000..2098c90 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll @@ -0,0 +1,20 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +; Check that an unreachable predecessor to a PHI node doesn't cause a crash. +; PR21625. + +define i32 @f(i32** %f) { +; CHECK: bb0: +; Load should be removed, since it's ignored. +; CHECK-NEXT: br label +bb0: + %bar = load i32*, i32** %f + br label %bb2 +bb1: + %zed = load i32*, i32** %f + br i1 false, label %bb1, label %bb2 +bb2: + %foo = phi i32* [ null, %bb0 ], [ %zed, %bb1 ] + %storemerge = load i32, i32* %foo + ret i32 %storemerge +} diff --git a/llvm/test/Transforms/NewGVN/malloc-load-removal.ll b/llvm/test/Transforms/NewGVN/malloc-load-removal.ll new file mode 100644 index 0000000..c91b6e1 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/malloc-load-removal.ll @@ -0,0 +1,57 @@ +; XFAIL: * +; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s +; RUN: opt -S -basicaa -newgvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS +; PR13694 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +declare i8* @malloc(i64) nounwind + +define noalias i8* @test1() nounwind uwtable ssp { +entry: + %call = tail call i8* @malloc(i64 100) nounwind + %0 = load i8, i8* %call, align 1 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK-LABEL: @test1( +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS-LABEL: @test1( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} + +declare i8* @_Znwm(i64) nounwind + +define noalias i8* @test2() nounwind uwtable ssp { +entry: + %call = tail call i8* @_Znwm(i64 100) nounwind + %0 = load i8, i8* %call, align 1 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK-LABEL: @test2( +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS-LABEL: @test2( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} diff --git a/llvm/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll b/llvm/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll new file mode 100644 index 0000000..a83d7b6 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll @@ -0,0 +1,55 @@ +; RUN: opt -O3 -S %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +declare noalias i8* @_Znam(i64) #1 + +define i32 @TestNoAsan() { + %1 = tail call noalias i8* @_Znam(i64 2) + %2 = getelementptr inbounds i8, i8* %1, i64 1 + store i8 0, i8* %2, align 1 + store i8 0, i8* %1, align 1 + %3 = bitcast i8* %1 to i16* + %4 = load i16, i16* %3, align 4 + %5 = icmp eq i16 %4, 0 + br i1 %5, label %11, label %6 + +;