From a6b91ac30726cb5f5716e85fe11fed1d3b9d8b8f Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 2 Nov 2012 21:48:17 +0000 Subject: [PATCH] Add a cost model analysis that allows us to estimate the cost of IR-level instructions. llvm-svn: 167324 --- llvm/include/llvm/Analysis/Passes.h | 7 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/lib/Analysis/Analysis.cpp | 1 + llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/CostModel.cpp | 175 +++++++++++++++++++++ llvm/test/Analysis/CostModel/X86/lit.local.cfg | 6 + llvm/test/Analysis/CostModel/X86/tiny.ll | 11 ++ .../test/Analysis/CostModel/X86/vectorized-loop.ll | 76 +++++++++ llvm/test/Analysis/CostModel/lit.local.cfg | 1 + llvm/test/Analysis/CostModel/no_info.ll | 15 ++ 11 files changed, 295 insertions(+) create mode 100644 llvm/lib/Analysis/CostModel.cpp create mode 100644 llvm/test/Analysis/CostModel/X86/lit.local.cfg create mode 100644 llvm/test/Analysis/CostModel/X86/tiny.ll create mode 100644 llvm/test/Analysis/CostModel/X86/vectorized-loop.ll create mode 100644 llvm/test/Analysis/CostModel/lit.local.cfg create mode 100644 llvm/test/Analysis/CostModel/no_info.ll diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h index ff1008b..27726f4 100644 --- a/llvm/include/llvm/Analysis/Passes.h +++ b/llvm/include/llvm/Analysis/Passes.h @@ -187,6 +187,13 @@ namespace llvm { //===--------------------------------------------------------------------===// // + // createCostModelAnalysisPass - This creates an instance of the + // CostModelAnalysis pass. + // + FunctionPass *createCostModelAnalysisPass(); + + //===--------------------------------------------------------------------===// + // // Minor pass prototypes, allowing us to expose them through bugpoint and // analyze. FunctionPass *createInstCountPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 9b8a6bd..8c164eb 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -88,6 +88,7 @@ void initializeCodePlacementOptPass(PassRegistry&); void initializeConstantMergePass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeMachineCopyPropagationPass(PassRegistry&); +void initializeCostModelAnalysisPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index a6d2f291..806e4b3 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -60,6 +60,7 @@ namespace { (void) llvm::createCFGSimplificationPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); + (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); (void) llvm::createDeadInstEliminationPass(); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 8969ea9..9dc81a6 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -26,6 +26,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeBasicAliasAnalysisPass(Registry); initializeBlockFrequencyInfoPass(Registry); initializeBranchProbabilityInfoPass(Registry); + initializeCostModelAnalysisPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 8f5a2f28..b3a40be 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp + CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp DbgInfoPrinter.cpp diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp new file mode 100644 index 0000000..142d287 --- /dev/null +++ b/llvm/lib/Analysis/CostModel.cpp @@ -0,0 +1,175 @@ +//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the cost model analysis. It provides a very basic cost +// estimation for LLVM-IR. The cost result can be thought of as cycles, but it +// is really unit-less. The estimated cost is ment to be used for comparing +// alternatives. +// +//===----------------------------------------------------------------------===// + +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME +#include "llvm/Analysis/Passes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/TargetTransformInfo.h" +#include "llvm/Value.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class CostModelAnalysis : public FunctionPass { + + public: + static char ID; // Class identification, replacement for typeinfo + CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) { + initializeCostModelAnalysisPass( + *PassRegistry::getPassRegistry()); + } + + /// Returns the expected cost of the instruction. + /// Returns -1 if the cost is unknown. + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + unsigned getInstructionCost(Instruction *I) const; + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual void print(raw_ostream &OS, const Module*) const; + + /// The function that we analyze. + Function *F; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; + }; +} // End of anonymous namespace + +// Register this pass. +char CostModelAnalysis::ID = 0; +static const char cm_name[] = "Cost Model Analysis"; +INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) +INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) + +FunctionPass *llvm::createCostModelAnalysisPass() { + return new CostModelAnalysis(); +} + +void +CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool +CostModelAnalysis::runOnFunction(Function &F) { + this->F = &F; + + // Target information. + TargetTransformInfo *TTI; + TTI = getAnalysisIfAvailable(); + if (TTI) + VTTI = TTI->getVectorTargetTransformInfo(); + + return false; +} + +unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { + if (!VTTI) + return -1; + + switch (I->getOpcode()) { + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return VTTI->getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); + } + case Instruction::Select: { + SelectInst *SI = cast(I); + Type *CondTy = SI->getCondition()->getType(); + return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + } + case Instruction::Store: { + StoreInst *SI = cast(I); + Type *ValTy = SI->getValueOperand()->getType(); + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast(I); + return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + } + default: + // We don't have any information on this instruction. + return -1; + } +} + +void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { + if (!F) + return; + + for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { + for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { + Instruction *Inst = it; + unsigned Cost = getInstructionCost(Inst); + if (Cost != (unsigned)-1) + OS << "Cost Model: Found an estimated cost of " << Cost; + else + OS << "Cost Model: Unknown cost"; + + OS << " for instruction: "<< *Inst << "\n"; + } + } +} diff --git a/llvm/test/Analysis/CostModel/X86/lit.local.cfg b/llvm/test/Analysis/CostModel/X86/lit.local.cfg new file mode 100644 index 0000000..a8ad0f1 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/llvm/test/Analysis/CostModel/X86/tiny.ll b/llvm/test/Analysis/CostModel/X86/tiny.ll new file mode 100644 index 0000000..cc7b443 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/tiny.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: cost of 1 {{.*}} add +;CHECK: cost of 1 {{.*}} ret +define i32 @no_info(i32 %arg) { + %e = add i32 %arg, %arg + ret i32 %e +} diff --git a/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll b/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll new file mode 100644 index 0000000..fbf20de --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @foo(i32* noalias nocapture %A, i32* noalias nocapture %B, i32 %start, i32 %end) nounwind uwtable ssp { +entry: + ;CHECK: cost of 1 {{.*}} icmp + %cmp7 = icmp slt i32 %start, %end + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + ;CHECK: cost of 1 {{.*}} sext + %0 = sext i32 %start to i64 + %1 = sub i32 %end, %start + %2 = zext i32 %1 to i64 + %end.idx = add i64 %2, %0 + ;CHECK: cost of 1 {{.*}} add + %n.vec = and i64 %2, 4294967288 + %end.idx.rnd.down = add i64 %n.vec, %0 + ;CHECK: cost of 1 {{.*}} icmp + %cmp.zero = icmp eq i64 %n.vec, 0 + br i1 %cmp.zero, label %middle.block, label %vector.body + +vector.body: ; preds = %for.body.lr.ph, %vector.body + %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ] + %3 = add i64 %index, 2 + %4 = getelementptr inbounds i32* %B, i64 %3 + ;CHECK: cost of 0 {{.*}} bitcast + %5 = bitcast i32* %4 to <8 x i32>* + ;CHECK: cost of 1 {{.*}} load + %6 = load <8 x i32>* %5, align 4 + %7 = mul nsw <8 x i32> %6, + %8 = getelementptr inbounds i32* %A, i64 %index + %9 = bitcast i32* %8 to <8 x i32>* + %10 = load <8 x i32>* %9, align 4 + %11 = add nsw <8 x i32> %10, %7 + ;CHECK: cost of 1 {{.*}} store + store <8 x i32> %11, <8 x i32>* %9, align 4 + %index.next = add i64 %index, 8 + %12 = icmp eq i64 %index.next, %end.idx.rnd.down + ;CHECK: cost of 1 {{.*}} br + br i1 %12, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body, %for.body.lr.ph + %cmp.n = icmp eq i64 %end.idx, %end.idx.rnd.down + br i1 %cmp.n, label %for.end, label %for.body + +for.body: ; preds = %middle.block, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ] + %13 = add nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %B, i64 %13 + ;CHECK: cost of 1 {{.*}} load + %14 = load i32* %arrayidx, align 4, !tbaa !0 + ;CHECK: cost of 1 {{.*}} mul + %mul = mul nsw i32 %14, 5 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv + ;CHECK: cost of 1 {{.*}} load + %15 = load i32* %arrayidx2, align 4, !tbaa !0 + %add3 = add nsw i32 %15, %mul + store i32 %add3, i32* %arrayidx2, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + ;CHECK: cost of 0 {{.*}} trunc + %16 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %16, %end + ;CHECK: cost of 1 {{.*}} br + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %middle.block, %for.body, %entry + ;CHECK: cost of 1 {{.*}} ret + ret i32 undef +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/llvm/test/Analysis/CostModel/lit.local.cfg b/llvm/test/Analysis/CostModel/lit.local.cfg new file mode 100644 index 0000000..19eebc0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/llvm/test/Analysis/CostModel/no_info.ll b/llvm/test/Analysis/CostModel/no_info.ll new file mode 100644 index 0000000..d20d56b --- /dev/null +++ b/llvm/test/Analysis/CostModel/no_info.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -cost-model -analyze | FileCheck %s + +; The cost model does not have any target information so it can't make a decision. +; Notice that OPT does not read the triple information from the module itself, only through the command line. + +; This info ignored: +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: Unknown cost {{.*}} add +;CHECK: Unknown cost {{.*}} ret +define i32 @no_info(i32 %arg) { + %e = add i32 %arg, %arg + ret i32 %e +} -- 2.7.4