From f3254838e47aa0afc36c8ba5af3207c5912972be Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 30 Oct 2012 11:23:25 +0000 Subject: [PATCH] Use TargetTransformInfo to control switch-to-lookup table transformation When the switch-to-lookup tables transform landed in SimplifyCFG, it was pointed out that this could be inappropriate for some targets. Since there was no way at the time for the pass to know anything about the target, an awkward reverse-transform was added in CodeGenPrepare that turned lookup tables back into switches for some targets. This patch uses the new TargetTransformInfo to determine if a switch should be transformed, and removes CodeGenPrepare::ConvertLoadToSwitch. llvm-svn: 167011 --- llvm/include/llvm/Target/TargetTransformImpl.h | 2 + llvm/include/llvm/TargetTransformInfo.h | 5 + llvm/include/llvm/Transforms/Utils/Local.h | 4 +- llvm/lib/Target/TargetTransformImpl.cpp | 7 +- llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp | 114 +-------------------- llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp | 12 ++- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 21 ++-- llvm/test/CodeGen/SPARC/load_to_switch.ll | 84 --------------- .../Transforms/SimplifyCFG/SPARC/lit.local.cfg | 6 ++ .../SimplifyCFG/SPARC/switch_to_lookup_table.ll | 32 ++++++ 10 files changed, 79 insertions(+), 208 deletions(-) delete mode 100644 llvm/test/CodeGen/SPARC/load_to_switch.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg create mode 100644 llvm/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll diff --git a/llvm/include/llvm/Target/TargetTransformImpl.h b/llvm/include/llvm/Target/TargetTransformImpl.h index fd4b737..fa1acbe 100644 --- a/llvm/include/llvm/Target/TargetTransformImpl.h +++ b/llvm/include/llvm/Target/TargetTransformImpl.h @@ -46,6 +46,8 @@ public: virtual unsigned getJumpBufAlignment() const; virtual unsigned getJumpBufSize() const; + + virtual bool shouldBuildLookupTables() const; }; class VectorTargetTransformImpl : public VectorTargetTransformInfo { diff --git a/llvm/include/llvm/TargetTransformInfo.h b/llvm/include/llvm/TargetTransformInfo.h index 6ee3b37..c65ef17 100644 --- a/llvm/include/llvm/TargetTransformInfo.h +++ b/llvm/include/llvm/TargetTransformInfo.h @@ -117,6 +117,11 @@ public: virtual unsigned getJumpBufSize() const { return 0; } + /// shouldBuildLookupTables - Return true if switches should be turned into + /// lookup tables for the target. + virtual bool shouldBuildLookupTables() const { + return true; + } }; /// VectorTargetTransformInfo - This interface is used by the vectorizers diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 49eeb57..5c804d8 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -37,6 +37,7 @@ class AllocaInst; class ConstantExpr; class DataLayout; class TargetLibraryInfo; +class TargetTransformInfo; class DIBuilder; template class SmallVectorImpl; @@ -134,7 +135,8 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB); /// of the CFG. It returns true if a modification was made, possibly deleting /// the basic block that was pointed to. /// -bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0); +bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0, + const TargetTransformInfo *TTI = 0); /// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, /// and if a predecessor branches to us and one of our successors, fold the diff --git a/llvm/lib/Target/TargetTransformImpl.cpp b/llvm/lib/Target/TargetTransformImpl.cpp index 27877a9..38c704f 100644 --- a/llvm/lib/Target/TargetTransformImpl.cpp +++ b/llvm/lib/Target/TargetTransformImpl.cpp @@ -49,6 +49,12 @@ unsigned ScalarTargetTransformImpl::getJumpBufSize() const { return TLI->getJumpBufSize(); } +bool ScalarTargetTransformImpl::shouldBuildLookupTables() const { + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + //===----------------------------------------------------------------------===// // // Calls used by the vectorizers. @@ -313,4 +319,3 @@ VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp)); return LT.first; } - diff --git a/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 74e310f..9cd5381 100644 --- a/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,7 +18,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -128,7 +127,6 @@ namespace { bool OptimizeSelectInst(SelectInst *SI); bool DupRetToEnableTailCallOpts(ReturnInst *RI); bool PlaceDbgValues(Function &F); - bool ConvertLoadToSwitch(LoadInst *LI); }; } @@ -1292,11 +1290,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast(I)) { - bool Changed = false; if (TLI) - Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); - Changed |= ConvertLoadToSwitch(LI); - return Changed; + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + return false; } if (StoreInst *SI = dyn_cast(I)) { @@ -1376,109 +1372,3 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { } return MadeChange; } - -static bool TargetSupportsJumpTables(const TargetLowering &TLI) { - return TLI.supportJumpTables() && - (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); -} - -/// ConvertLoadToSwitch - Convert loads from constant lookup tables into -/// switches. This undos the switch-to-lookup table transformation in -/// SimplifyCFG for targets where that is inprofitable. -bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) { - // This only applies to targets that don't support jump tables. - if (!TLI || TargetSupportsJumpTables(*TLI)) - return false; - - // FIXME: In the future, it would be desirable to have enough target - // information in SimplifyCFG, so we could decide at that stage whether to - // transform the switch to a lookup table or not, and this - // reverse-transformation could be removed. - - GetElementPtrInst *GEP = dyn_cast(LI->getPointerOperand()); - if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace()) - return false; - if (GEP->getNumIndices() != 2) - return false; - Value *FirstIndex = GEP->idx_begin()[0]; - ConstantInt *FirstIndexInt = dyn_cast(FirstIndex); - if (!FirstIndexInt || !FirstIndexInt->isZero()) - return false; - - Value *TableIndex = GEP->idx_begin()[1]; - IntegerType *TableIndexTy = cast(TableIndex->getType()); - - GlobalVariable *GV = dyn_cast(GEP->getPointerOperand()); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) - return false; - - Constant *Arr = GV->getInitializer(); - uint64_t NumElements; - if (ConstantArray *CA = dyn_cast(Arr)) - NumElements = CA->getType()->getNumElements(); - else if (ConstantDataArray *CDA = dyn_cast(Arr)) - NumElements = CDA->getNumElements(); - else - return false; - if (NumElements < 2) - return false; - - // Split the block. - BasicBlock *OriginalBB = LI->getParent(); - BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI); - - // Replace OriginalBB's terminator with a switch. - IRBuilder<> Builder(OriginalBB->getTerminator()); - SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB, - NumElements - 1); - OriginalBB->getTerminator()->eraseFromParent(); - - // Count the frequency of each value to decide which to use as default. - SmallDenseMap ValueFreq; - for (uint64_t I = 0; I < NumElements; ++I) - ++ValueFreq[Arr->getAggregateElement(I)]; - uint64_t MaxCount = 0; - Constant *DefaultValue = NULL; - for (SmallDenseMap::iterator I = ValueFreq.begin(), - E = ValueFreq.end(); I != E; ++I) { - if (I->second > MaxCount) { - MaxCount = I->second; - DefaultValue = I->first; - } - } - assert(DefaultValue && "No values in the array?"); - - // Create the phi node in PostSwitchBB, which will replace the load. - Builder.SetInsertPoint(PostSwitchBB->begin()); - PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements); - PHI->addIncoming(DefaultValue, OriginalBB); - - // Build basic blocks to target with the switch. - for (uint64_t I = 0; I < NumElements; ++I) { - Constant *C = Arr->getAggregateElement(I); - if (C == DefaultValue) continue; // Already covered by the default case. - - BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(), - "lookup.bb", - PostSwitchBB->getParent(), - PostSwitchBB); - Switch->addCase(ConstantInt::get(TableIndexTy, I), BB); - Builder.SetInsertPoint(BB); - Builder.CreateBr(PostSwitchBB); - PHI->addIncoming(C, BB); - } - - // Remove the load. - LI->replaceAllUsesWith(PHI); - LI->eraseFromParent(); - - // Clean up. - if (GEP->use_empty()) - GEP->eraseFromParent(); - if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty()) - GV->eraseFromParent(); - - CurInstIterator = Switch; - return true; -} diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 84b820b..9f24bb6 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -35,6 +35,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/TargetTransformInfo.h" using namespace llvm; STATISTIC(NumSimpl, "Number of blocks simplified"); @@ -293,7 +294,8 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) { +static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD, + const TargetTransformInfo *TTI) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -302,7 +304,7 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) { // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TD)) { + if (SimplifyCFG(BBIt++, TD, TTI)) { LocalChange = true; ++NumSimpl; } @@ -317,9 +319,11 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const DataLayout *TD = getAnalysisIfAvailable(); + const TargetTransformInfo *TTI = + getAnalysisIfAvailable(); bool EverChanged = removeUnreachableBlocksFromFn(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TD); + EverChanged |= iterativelySimplifyCFG(F, TD, TTI); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -333,7 +337,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { return true; do { - EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged = iterativelySimplifyCFG(F, TD, TTI); EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f05a8ba..04ffc90 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/IRBuilder.h" @@ -39,7 +40,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include @@ -82,6 +83,7 @@ namespace { class SimplifyCFGOpt { const DataLayout *const TD; + const TargetTransformInfo *const TTI; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -101,7 +103,8 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - explicit SimplifyCFGOpt(const DataLayout *td) : TD(td) {} + SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti) + : TD(td), TTI(tti) {} bool run(BasicBlock *BB); }; } @@ -3459,8 +3462,13 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout* TD) { + const DataLayout* TD, + const TargetTransformInfo *TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + if (TTI && !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables()) + return false; + // FIXME: Handle unreachable cases. // FIXME: If the switch is too sparse for a lookup table, perhaps we could @@ -3619,7 +3627,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder, TD)) + if (SwitchToLookupTable(SI, Builder, TD, TTI)) return SimplifyCFG(BB) | true; return false; @@ -3916,6 +3924,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD) { - return SimplifyCFGOpt(TD).run(BB); +bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD, + const TargetTransformInfo *TTI) { + return SimplifyCFGOpt(TD, TTI).run(BB); } diff --git a/llvm/test/CodeGen/SPARC/load_to_switch.ll b/llvm/test/CodeGen/SPARC/load_to_switch.ll deleted file mode 100644 index 8d62de5..0000000 --- a/llvm/test/CodeGen/SPARC/load_to_switch.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: llc -march=sparc < %s | FileCheck %s - -; Check that all the switches turned into lookup tables by SimplifyCFG are -; turned back into switches for targets that don't like lookup tables. - -@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1 -@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1 -@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1 -@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1 -@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1 -@switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] -@switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05" -@switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] -@switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] - -define i32 @f(i32 %c) { -entry: - %switch.tableidx = sub i32 %c, 42 - %0 = icmp ult i32 %switch.tableidx, 7 - br i1 %0, label %switch.lookup, label %return - -switch.lookup: - %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx - %switch.load = load i32* %switch.gep - ret i32 %switch.load - -return: - ret i32 15 - -; CHECK: f: -; CHECK: %switch.lookup -; CHECK-NOT: sethi %hi(.Lswitch.table) -} - -declare void @dummy(i8 signext, float) - -define void @h(i32 %x) { -entry: - %switch.tableidx = sub i32 %x, 0 - %0 = icmp ult i32 %switch.tableidx, 4 - br i1 %0, label %switch.lookup, label %sw.epilog - -switch.lookup: - %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx - %switch.load = load i8* %switch.gep - %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx - %switch.load2 = load float* %switch.gep1 - br label %sw.epilog - -sw.epilog: - %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] - %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] - call void @dummy(i8 signext %a.0, float %b.0) - ret void - -; CHECK: h: -; CHECK: %switch.lookup -; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}}) -; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}}) -} - -define i8* @foostring(i32 %x) { -entry: - %switch.tableidx = sub i32 %x, 0 - %0 = icmp ult i32 %switch.tableidx, 4 - br i1 %0, label %switch.lookup, label %return - -switch.lookup: - %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx - %switch.load = load i8** %switch.gep - ret i8* %switch.load - -return: - ret i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0) - -; CHECK: foostring: -; CHECK: %switch.lookup -; CHECK-NOT: sethi %hi(.Lswitch.table3) -} - -; CHECK-NOT: .Lswitch.table -; CHECK-NOT: .Lswitch.table1 -; CHECK-NOT: .Lswitch.table2 -; CHECK-NOT: .Lswitch.table3 diff --git a/llvm/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/llvm/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg new file mode 100644 index 0000000..786fee9 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'Sparc' in targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll new file mode 100644 index 0000000..9d15685 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -simplifycfg -S -mtriple=sparc-unknown-unknown | FileCheck %s + +; Check that switches are not turned into lookup tables, as this is not +; considered profitable on the target. + +define i32 @f(i32 %c) nounwind uwtable readnone { +entry: + switch i32 %c, label %sw.default [ + i32 42, label %return + i32 43, label %sw.bb1 + i32 44, label %sw.bb2 + i32 45, label %sw.bb3 + i32 46, label %sw.bb4 + i32 47, label %sw.bb5 + i32 48, label %sw.bb6 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.bb5: br label %return +sw.bb6: br label %return +sw.default: br label %return +return: + %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ] + ret i32 %retval.0 + +; CHECK: @f +; CHECK-NOT: getelementptr +; CHECK: switch i32 %c +} -- 2.7.4