From 84879b71a93fe2059313885311422bcb779702f8 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Sun, 26 Jul 2015 18:30:13 +0000 Subject: [PATCH] Revert r243250 breaks tests llvm-svn: 243251 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 9 +-- .../llvm/Analysis/TargetTransformInfoImpl.h | 58 +------------- llvm/include/llvm/IR/GetElementPtrTypeIterator.h | 2 +- llvm/lib/Analysis/CostModel.cpp | 6 +- .../Scalar/StraightLineStrengthReduce.cpp | 1 - llvm/test/Analysis/CostModel/ARM/gep.ll | 88 ++++++---------------- llvm/test/Analysis/CostModel/no_info.ll | 22 +----- llvm/test/Transforms/LoopUnroll/X86/partial.ll | 9 +-- .../LoopVectorize/X86/metadata-enable.ll | 6 +- 9 files changed, 43 insertions(+), 158 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index eb6fa9d..01f0089 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -136,8 +136,7 @@ public: /// The contract for this function is the same as \c getOperationCost except /// that it supports an interface that provides extra information specific to /// the GEP operation. - unsigned getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) const; + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) const; /// \brief Estimate the cost of a function call when lowered. /// @@ -544,7 +543,7 @@ public: virtual ~Concept() = 0; virtual const DataLayout &getDataLayout() const = 0; virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; - virtual unsigned getGEPCost(Type *PointeeType, const Value *Ptr, + virtual unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) = 0; virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual unsigned getCallCost(const Function *F, int NumArgs) = 0; @@ -644,9 +643,9 @@ public: unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { return Impl.getOperationCost(Opcode, Ty, OpTy); } - unsigned getGEPCost(Type *PointeeType, const Value *Ptr, + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) override { - return Impl.getGEPCost(PointeeType, Ptr, Operands); + return Impl.getGEPCost(Ptr, Operands); } unsigned getCallCost(FunctionType *FTy, int NumArgs) override { return Impl.getCallCost(FTy, NumArgs); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 505126a..035cb04 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -19,7 +19,6 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -93,8 +92,7 @@ public: } } - unsigned getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -380,54 +378,6 @@ public: return static_cast(this)->getCallCost(F, Arguments.size()); } - using BaseT::getGEPCost; - - unsigned getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { - const GlobalValue *BaseGV = nullptr; - if (Ptr != nullptr) { - // TODO: will remove this when pointers have an opaque type. - assert(Ptr->getType()->getScalarType()->getPointerElementType() == - PointeeType && - "explicit pointee type doesn't match operand's pointee type"); - BaseGV = dyn_cast(Ptr->stripPointerCasts()); - } - bool HasBaseReg = (BaseGV == nullptr); - int64_t BaseOffset = 0; - int64_t Scale = 0; - - // Assumes the address space is 0 when Ptr is nullptr. - unsigned AS = - (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); - auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands); - for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { - if (isa(*GTI)) { - int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (const ConstantInt *ConstIdx = dyn_cast(*I)) { - BaseOffset += ConstIdx->getSExtValue() * ElementSize; - } else { - // Needs scale register. - if (Scale != 0) { - // No addressing mode takes two scale registers. - return TTI::TCC_Basic; - } - Scale = ElementSize; - } - } else { - StructType *STy = cast(*GTI); - uint64_t Field = cast(*I)->getZExtValue(); - BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); - } - } - - if (static_cast(this)->isLegalAddressingMode( - PointerType::get(*GTI, AS), const_cast(BaseGV), - BaseOffset, HasBaseReg, Scale, AS)) { - return TTI::TCC_Free; - } - return TTI::TCC_Basic; - } - using BaseT::getIntrinsicCost; unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -447,9 +397,9 @@ public: return TTI::TCC_Free; // Model all PHI nodes as free. if (const GEPOperator *GEP = dyn_cast(U)) { - SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - return static_cast(this)->getGEPCost( - GEP->getSourceElementType(), GEP->getPointerOperand(), Indices); + SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + return static_cast(this) + ->getGEPCost(GEP->getPointerOperand(), Indices); } if (auto CS = ImmutableCallSite(U)) { diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h index 7cb13fa..6bba0ae2 100644 --- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h @@ -78,7 +78,7 @@ namespace llvm { // current type directly. Type *operator->() const { return operator*(); } - Value *getOperand() const { return const_cast(&**OpIt); } + Value *getOperand() const { return *OpIt; } generic_gep_type_iterator& operator++() { // Preincrement if (CurTy.getInt()) { diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index da790d7..b529c1a 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -383,8 +383,10 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return -1; switch (I->getOpcode()) { - case Instruction::GetElementPtr: - return TTI->getUserCost(I); + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } case Instruction::Ret: case Instruction::PHI: diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index b2b886c..6d9d417 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -234,7 +234,6 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, Basis.CandidateKind == C.CandidateKind); } -// TODO: use TTI->getGEPCost. static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI, const DataLayout *DL) { diff --git a/llvm/test/Analysis/CostModel/ARM/gep.ll b/llvm/test/Analysis/CostModel/ARM/gep.ll index a70d6d4..624ca11 100644 --- a/llvm/test/Analysis/CostModel/ARM/gep.ll +++ b/llvm/test/Analysis/CostModel/ARM/gep.ll @@ -3,85 +3,41 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios6.0.0" -define void @test_geps(i32 %i) { - ; GEPs with index 0 are essentially NOOPs. -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* +define void @test_geps() { + ; Cost of scalar integer geps should be one. We can't always expect it to be + ; folded into the instruction addressing mode. +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8* %a0 = getelementptr inbounds i8, i8* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16* %a1 = getelementptr inbounds i16, i16* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* %a2 = getelementptr inbounds i32, i32* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* %a3 = getelementptr inbounds i64, i64* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float* + + ; Cost of scalar floating point geps should be one. We cannot fold the address + ; computation. +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float* %a4 = getelementptr inbounds float, float* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* %a5 = getelementptr inbounds double, double* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* + + + ; Cost of vector geps should be one. We cannot fold the address computation. +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 - ; Cost of GEPs is one if we cannot fold the address computation. -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* - %b0 = getelementptr inbounds i8, i8* undef, i32 1024 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* - %b1 = getelementptr inbounds i16, i16* undef, i32 1024 - ; Thumb-2 cannot fold offset >= 2^12 into address computation. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* - %b2 = getelementptr inbounds i32, i32* undef, i32 1024 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* - %b3 = getelementptr inbounds i64, i64* undef, i32 1024 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float* - %b4 = getelementptr inbounds float, float* undef, i32 1024 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* - %b5 = getelementptr inbounds double, double* undef, i32 1024 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* - %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* - %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* - %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* - %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* - %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* - %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1 - -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* - %c0 = getelementptr inbounds i8, i8* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* - %c1 = getelementptr inbounds i16, i16* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* - %c2 = getelementptr inbounds i32, i32* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* - %c3 = getelementptr inbounds i64, i64* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float* - %c4 = getelementptr inbounds float, float* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double* - %c5 = getelementptr inbounds double, double* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* - %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* - %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i - ; Thumb-2 cannot fold scales larger than 8 to address computation. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* - %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* - %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* - %c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* - %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i ret void } diff --git a/llvm/test/Analysis/CostModel/no_info.ll b/llvm/test/Analysis/CostModel/no_info.ll index 931669b..5f3b56a 100644 --- a/llvm/test/Analysis/CostModel/no_info.ll +++ b/llvm/test/Analysis/CostModel/no_info.ll @@ -5,27 +5,9 @@ ; -- No triple in this module -- -; CHECK-LABEL: function 'no_info' -; CHECK: cost of 1 {{.*}} add -; CHECK: cost of 1 {{.*}} ret +;CHECK: cost of 1 {{.*}} add +;CHECK: cost of 1 {{.*}} ret define i32 @no_info(i32 %arg) { %e = add i32 %arg, %arg ret i32 %e } - -define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) { -; CHECK-LABEL: function 'addressing_mode_reg_reg' - %p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing. -; CHECK: cost of 0 {{.*}} getelementptr - %v = load i8, i8* %p - ret i8 %v -} - -; CHECK-LABEL: function 'addressing_mode_scaled_reg' -define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) { - ; NoTTI rejects reg+scale*reg addressing. - %p = getelementptr i32, i32* %a, i32 %b -; CHECK: cost of 1 {{.*}} getelementptr - %v = load i32, i32* %p - ret i32 %v -} diff --git a/llvm/test/Transforms/LoopUnroll/X86/partial.ll b/llvm/test/Transforms/LoopUnroll/X86/partial.ll index 104a387..4566f79 100644 --- a/llvm/test/Transforms/LoopUnroll/X86/partial.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/partial.ll @@ -86,20 +86,17 @@ for.body: ; preds = %entry, %for.body %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv %0 = load i16, i16* %arrayidx, align 2 - %mul = shl i16 %0, 1 - %add = add i16 %mul, %reduction.026 + %add = add i16 %0, %reduction.026 %sext = mul i64 %indvars.iv, 12884901888 %idxprom3 = ashr exact i64 %sext, 32 %arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3 %1 = load i16, i16* %arrayidx4, align 2 - %mul2 = shl i16 %1, 1 - %add7 = add i16 %add, %mul2 + %add7 = add i16 %add, %1 %sext28 = mul i64 %indvars.iv, 21474836480 %idxprom10 = ashr exact i64 %sext28, 32 %arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10 %2 = load i16, i16* %arrayidx11, align 2 - %mul3 = shl i16 %2, 1 - %add14 = add i16 %add7, %mul3 + %add14 = add i16 %add7, %2 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index 74c0c16..ba8e11e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -60,7 +60,7 @@ for.body: ; preds = %for.body, %entry %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 + %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 for.end: ; preds = %for.body @@ -111,7 +111,7 @@ for.body: ; preds = %for.body, %entry %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 + %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body @@ -162,7 +162,7 @@ for.body: ; preds = %for.body, %entry %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 + %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 for.end: ; preds = %for.body -- 2.7.4