From 5bbb5aafc10c3a164e966afcd30f5eace4ee033a Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Sat, 27 May 2017 00:54:19 +0000 Subject: [PATCH] [GVN] Recommit the patch "Add phi-translate support in scalarpre". The recommit is to fix a bug about ExtractValue and InsertValue ops. For those ops, some varargs inside GVN::Expression are not value numbers but raw index numbers. It is wrong to do phi-translate for raw index numbers, and the fix is to stop doing that. Right now scalarpre doesn't have phi-translate support, so it will miss some simple pre opportunities. Like the following testcase, current scalarpre cannot recognize the last "a * b" is fully redundent because a and b used by the last "a * b" expr are both defined by phis. long a[100], b[100], g1, g2, g3; __attribute__((pure)) long goo(); void foo(long a, long b, long c, long d) { g1 = a * b; if (__builtin_expect(g2 > 3, 0)) { a = c; b = d; g2 = a * b; } g3 = a * b; // fully redundant. } The patch adds phi-translate support in scalarpre. This is only a temporary solution before the newpre based on newgvn is available. Differential Revision: https://reviews.llvm.org/D32252 llvm-svn: 304050 --- llvm/include/llvm/Transforms/Scalar/GVN.h | 27 +++- llvm/lib/Transforms/Scalar/GVN.cpp | 164 +++++++++++++++++++++--- llvm/test/Transforms/GVN/PRE/phi-translate-2.ll | 105 +++++++++++++++ llvm/test/Transforms/GVN/PRE/pre-gep-load.ll | 2 +- llvm/test/Transforms/GVN/PRE/pre-load.ll | 6 +- 5 files changed, 278 insertions(+), 26 deletions(-) create mode 100644 llvm/test/Transforms/GVN/PRE/phi-translate-2.ll diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 589aaac..3f97789 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -68,6 +68,24 @@ public: class ValueTable { DenseMap valueNumbering; DenseMap expressionNumbering; + + // Expressions is the vector of Expression. ExprIdx is the mapping from + // value number to the index of Expression in Expressions. We use it + // instead of a DenseMap because filling such mapping is faster than + // filling a DenseMap and the compile time is a little better. + uint32_t nextExprNumber; + std::vector Expressions; + std::vector ExprIdx; + // Value number to PHINode mapping. Used for phi-translate in scalarpre. + DenseMap NumberingPhi; + // Cache for phi-translate in scalarpre. + typedef DenseMap, uint32_t> + PhiTranslateMap; + PhiTranslateMap PhiTranslateTable; + // Map the block to reversed postorder traversal number. It is used to + // find back edge easily. + DenseMap BlockRPONumber; + AliasAnalysis *AA; MemoryDependenceResults *MD; DominatorTree *DT; @@ -79,6 +97,10 @@ public: Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); uint32_t lookupOrAddCall(CallInst *C); + uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn); + std::pair assignExpNewValueNum(Expression &exp); + bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn); public: ValueTable(); @@ -87,9 +109,12 @@ public: ~ValueTable(); uint32_t lookupOrAdd(Value *V); - uint32_t lookup(Value *V) const; + uint32_t lookup(Value *V, bool Verify = true) const; uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS); + uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn); + void assignBlockRPONumber(Function &F); bool exists(Value *V) const; void add(Value *V, uint32_t num); void clear(); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 0490d93..0d6e053 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, struct llvm::GVN::Expression { uint32_t opcode; Type *type; + bool commutative; SmallVector varargs; - Expression(uint32_t o = ~2U) : opcode(o) {} + Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {} bool operator==(const Expression &other) const { if (opcode != other.opcode) @@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); + e.commutative = true; } if (CmpInst *C = dyn_cast(I)) { @@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (C->getOpcode() << 8) | Predicate; + e.commutative = true; } else if (InsertValueInst *E = dyn_cast(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (Opcode << 8) | Predicate; + e.commutative = true; return e; } @@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default; /// add - Insert a value into the table with a specified value number. void GVN::ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); + if (PHINode *PN = dyn_cast(V)) + NumberingPhi[num] = PN; } uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); - uint32_t &e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = createExpr(C); - uint32_t &e = expressionNumbering[exp]; - if (!e) { - e = nextValueNumber++; - valueNumbering[C] = e; - return e; + auto ValNum = assignExpNewValueNum(exp); + if (ValNum.second) { + valueNumbering[C] = ValNum.first; + return ValNum.first; } if (!MD) { - e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; } @@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { case Instruction::ExtractValue: exp = createExtractvalueExpr(cast(I)); break; + case Instruction::PHI: + valueNumbering[V] = nextValueNumber; + NumberingPhi[nextValueNumber] = cast(V); + return nextValueNumber++; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[V] = e; return e; } /// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t GVN::ValueTable::lookup(Value *V) const { +uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const { DenseMap::const_iterator VI = valueNumbering.find(V); - assert(VI != valueNumbering.end() && "Value not numbered?"); - return VI->second; + if (Verify) { + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; + } + return (VI != valueNumbering.end()) ? VI->second : 0; } /// Returns the value number of the given comparison, @@ -549,21 +559,29 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; - return e; + return assignExpNewValueNum(exp).first; } /// Remove all entries from the ValueTable. void GVN::ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); + NumberingPhi.clear(); + PhiTranslateTable.clear(); + BlockRPONumber.clear(); nextValueNumber = 1; + Expressions.clear(); + ExprIdx.clear(); + nextExprNumber = 0; } /// Remove a value from the value numbering. void GVN::ValueTable::erase(Value *V) { + uint32_t Num = valueNumbering.lookup(V); valueNumbering.erase(V); + // If V is PHINode, V <--> value number is an one-to-one mapping. + if (isa(V)) + NumberingPhi.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -1451,6 +1469,104 @@ bool GVN::processLoad(LoadInst *L) { return false; } +/// Return a pair the first field showing the value number of \p Exp and the +/// second field showing whether it is a value number newly created. +std::pair +GVN::ValueTable::assignExpNewValueNum(Expression &Exp) { + uint32_t &e = expressionNumbering[Exp]; + bool CreateNewValNum = !e; + if (CreateNewValNum) { + Expressions.push_back(Exp); + if (ExprIdx.size() < nextValueNumber + 1) + ExprIdx.resize(nextValueNumber * 2); + e = nextValueNumber; + ExprIdx[nextValueNumber++] = nextExprNumber++; + } + return {e, CreateNewValNum}; +} + +void GVN::ValueTable::assignBlockRPONumber(Function &F) { + uint32_t NextBlockNumber = 1; + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) + BlockRPONumber[BB] = NextBlockNumber++; +} + +/// Return whether all the values related with the same \p num are +/// defined in \p BB. +bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, + GVN &Gvn) { + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals && Vals->BB == BB) + Vals = Vals->Next; + return !Vals; +} + +/// Wrap phiTranslateImpl to provide caching functionality. +uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, + const BasicBlock *PhiBlock, uint32_t Num, + GVN &Gvn) { + auto FindRes = PhiTranslateTable.find({Num, Pred}); + if (FindRes != PhiTranslateTable.end()) + return FindRes->second; + uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); + PhiTranslateTable.insert({{Num, Pred}, NewNum}); + return NewNum; +} + +/// Translate value number \p Num using phis, so that it has the values of +/// the phis in BB. +uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, + const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn) { + if (PHINode *PN = NumberingPhi[Num]) { + if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock]) + return Num; + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) + if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) + return TransVal; + } + return Num; + } + + // If there is any value related with Num is defined in a BB other than + // PhiBlock, it cannot depend on a phi in PhiBlock without going through + // a backedge. We can do an early exit in that case to save compile time. + if (!areAllValsInBB(Num, PhiBlock, Gvn)) + return Num; + + if (ExprIdx[Num] == 0 || Num >= ExprIdx.size()) + return Num; + Expression Exp = Expressions[ExprIdx[Num]]; + + for (unsigned i = 0; i < Exp.varargs.size(); i++) { + // For InsertValue and ExtractValue, some varargs are index numbers + // instead of value numbers. Those index numbers should not be + // translated. + if ((i > 1 && Exp.opcode == Instruction::InsertValue) || + (i > 0 && Exp.opcode == Instruction::ExtractValue)) + continue; + Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); + } + + if (Exp.commutative) { + assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); + if (Exp.varargs[0] > Exp.varargs[1]) { + std::swap(Exp.varargs[0], Exp.varargs[1]); + uint32_t Opcode = Exp.opcode >> 8; + if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) + Exp.opcode = (Opcode << 8) | + CmpInst::getSwappedPredicate( + static_cast(Exp.opcode & 255)); + } + } + + if (uint32_t NewNum = expressionNumbering[Exp]) + return NewNum; + return Num; +} + // In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in @@ -1856,6 +1972,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // Fabricate val-num for dead-code in order to suppress assertion in // performPRE(). assignValNumForDeadCode(); + VN.assignBlockRPONumber(F); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); @@ -1945,7 +2062,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, success = false; break; } - if (Value *V = findLeader(Pred, VN.lookup(Op))) { + uint32_t TValNo = + VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this); + if (Value *V = findLeader(Pred, TValNo)) { Instr->setOperand(i, V); } else { success = false; @@ -1962,10 +2081,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Instr->insertBefore(Pred->getTerminator()); Instr->setName(Instr->getName() + ".pre"); Instr->setDebugLoc(Instr->getDebugLoc()); - VN.add(Instr, ValNo); + + unsigned Num = VN.lookupOrAdd(Instr); + VN.add(Instr, Num); // Update the availability map to include the new instruction. - addToLeaderTable(ValNo, Instr, Pred); + addToLeaderTable(Num, Instr, Pred); return true; } @@ -2014,7 +2135,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { break; } - Value *predV = findLeader(P, ValNo); + uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); + Value *predV = findLeader(P, TValNo); if (!predV) { predMap.push_back(std::make_pair(static_cast(nullptr), P)); PREPred = P; diff --git a/llvm/test/Transforms/GVN/PRE/phi-translate-2.ll b/llvm/test/Transforms/GVN/PRE/phi-translate-2.ll new file mode 100644 index 0000000..b299365 --- /dev/null +++ b/llvm/test/Transforms/GVN/PRE/phi-translate-2.ll @@ -0,0 +1,105 @@ +; RUN: opt < %s -gvn -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@a = common global [100 x i64] zeroinitializer, align 16 +@b = common global [100 x i64] zeroinitializer, align 16 +@g1 = common global i64 0, align 8 +@g2 = common global i64 0, align 8 +@g3 = common global i64 0, align 8 +declare i64 @goo(...) local_unnamed_addr #1 + +define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) { +entry: + %mul = mul nsw i64 %b, %a + store i64 %mul, i64* @g1, align 8 + %t0 = load i64, i64* @g2, align 8 + %cmp = icmp sgt i64 %t0, 3 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %mul2 = mul nsw i64 %d, %c + store i64 %mul2, i64* @g2, align 8 + br label %if.end + +; Check phi-translate works and mul is removed. +; CHECK-LABEL: @test1( +; CHECK: if.end: +; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ] +; CHECK-NOT: = mul +; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8 +if.end: ; preds = %if.then, %entry + %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ] + %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ] + %mul3 = mul nsw i64 %a.addr.0, %b.addr.0 + store i64 %mul3, i64* @g3, align 8 + ret void +} + +define void @test2(i64 %i) { +entry: + %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i + %t0 = load i64, i64* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i + %t1 = load i64, i64* %arrayidx1, align 8 + %mul = mul nsw i64 %t1, %t0 + store i64 %mul, i64* @g1, align 8 + %cmp = icmp sgt i64 %mul, 3 + br i1 %cmp, label %if.then, label %if.end + +; Check phi-translate works for the phi generated by loadpre. A new mul will be +; inserted in if.then block. +; CHECK-LABEL: @test2( +; CHECK: if.then: +; CHECK: %[[MUL_THEN:.*]] = mul +; CHECK: br label %if.end +if.then: ; preds = %entry + %call = tail call i64 (...) @goo() #2 + store i64 %call, i64* @g2, align 8 + br label %if.end + +; CHECK: if.end: +; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ] +; CHECK-NOT: = mul +; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8 +if.end: ; preds = %if.then, %entry + %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ] + %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0 + %t2 = load i64, i64* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0 + %t3 = load i64, i64* %arrayidx4, align 8 + %mul5 = mul nsw i64 %t3, %t2 + store i64 %mul5, i64* @g3, align 8 + ret void +} + +; Check phi-translate doesn't go through backedge, which may lead to incorrect +; pre transformation. +; CHECK: for.end: +; CHECK-NOT: %{{.*pre-phi}} = phi +; CHECK: ret void +define void @test3(i64 %N, i64* nocapture readonly %a) { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ] + %add = add nuw nsw i64 %i.0, 1 + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add + %tmp0 = load i64, i64* %arrayidx, align 8 + %cmp = icmp slt i64 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = tail call i64 (...) @goo() #2 + %add1 = sub nsw i64 0, %call + %tobool = icmp eq i64 %tmp0, %add1 + br i1 %tobool, label %for.cond, label %for.end + +for.end: ; preds = %for.body, %for.cond + %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ] + %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa + %tmp1 = load i64, i64* %arrayidx2, align 8 + store i64 %tmp1, i64* @g1, align 8 + ret void +} + diff --git a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll index 9eec8bb..1b2b4d2 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -37,7 +37,7 @@ sw.bb2: ; preds = %if.end, %entry %3 = load double, double* %arrayidx5, align 8 ; CHECK: sw.bb2: ; CHECK-NOT: sext -; CHECK-NEXT: phi double [ +; CHECK: phi double [ ; CHECK-NOT: load %sub6 = fsub double 3.000000e+00, %3 br label %return diff --git a/llvm/test/Transforms/GVN/PRE/pre-load.ll b/llvm/test/Transforms/GVN/PRE/pre-load.ll index 685df24..ffff2b7 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load.ll @@ -72,7 +72,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK-NEXT: phi i32 [ +; CHECK: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } @@ -104,7 +104,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK-NEXT: phi i32 [ +; CHECK: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } @@ -263,7 +263,7 @@ block4: %PRE = load i32, i32* %P3 ret i32 %PRE ; CHECK: block4: -; CHECK-NEXT: phi i32 [ +; CHECK: phi i32 [ ; CHECK-NOT: load ; CHECK: ret i32 } -- 2.7.4