From fa8a9fea47e08f8d26e5796aed02e05668df1e80 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Tue, 26 Apr 2022 11:32:24 -0700 Subject: [PATCH] Recommit "[SLP][TTI] Refactoring of `getShuffleCost` `Args` to work like `getArithmeticInstrCost`" This reverts commit 6a9bbd9f20dcd700e28738788bb63a160c6c088c. Code review: https://reviews.llvm.org/D124202 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 6 ++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 27 +++++++++------ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 2 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 2 +- .../Target/AArch64/AArch64TargetTransformInfo.cpp | 6 ++-- .../Target/AArch64/AArch64TargetTransformInfo.h | 2 +- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 2 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 +- .../Target/Hexagon/HexagonTargetTransformInfo.cpp | 2 +- .../Target/Hexagon/HexagonTargetTransformInfo.h | 2 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 2 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 2 +- .../Target/SystemZ/SystemZTargetTransformInfo.cpp | 2 +- .../Target/SystemZ/SystemZTargetTransformInfo.h | 2 +- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 7 ++-- llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 +- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- .../Analysis/CostModel/AArch64/shuffle-load.ll | 38 +++++++++++----------- llvm/test/Analysis/CostModel/X86/splat-load.ll | 2 +- 23 files changed, 62 insertions(+), 58 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 8ac6485..19a6b14 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1055,7 +1055,7 @@ public: InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask = None, int Index = 0, VectorType *SubTp = nullptr, - ArrayRef Args = None) const; + ArrayRef Args = None) const; /// Represents a hint about the context in which a cast is used. /// @@ -1672,7 +1672,7 @@ public: virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) = 0; + ArrayRef Args) = 0; virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, @@ -2199,7 +2199,7 @@ public: InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) override { + ArrayRef Args) override { return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args); } InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index ff73e62..9111b74 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -493,7 +493,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None) const { + ArrayRef Args = None) const { return 1; } @@ -1147,13 +1147,14 @@ public: if (Shuffle->isExtractSubvectorMask(SubIndex)) return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, Shuffle->getShuffleMask(), SubIndex, - VecTy); + VecTy, Operands); if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) return TargetTTI->getShuffleCost( TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, - FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + FixedVectorType::get(VecTy->getScalarType(), NumSubElts), + Operands); int ReplicationFactor, VF; if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { @@ -1176,31 +1177,37 @@ public: if (Shuffle->isReverse()) return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isSelect()) return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isTranspose()) return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isZeroEltSplat()) return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isSingleSource()) return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) return TargetTTI->getShuffleCost( TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, - FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands); return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, - Shuffle->getShuffleMask(), 0, nullptr); + Shuffle->getShuffleMask(), 0, nullptr, + Operands); } case Instruction::ExtractElement: { auto *EEI = dyn_cast(U); diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d3b2272..83e9fa0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -872,7 +872,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None) { + ArrayRef Args = None) { switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8a46569..df3d332 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -747,7 +747,7 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost( InstructionCost TargetTransformInfo::getShuffleCost( ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, int Index, - VectorType *SubTp, ArrayRef Args) const { + VectorType *SubTp, ArrayRef Args) const { InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 3383296..65f9dbe 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2594,7 +2594,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || @@ -2603,9 +2603,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Check for broadcast loads. if (Kind == TTI::SK_Broadcast) { - bool IsLoad = !Args.empty() && llvm::all_of(Args, [](const Value *V) { - return isa(V); - }); + bool IsLoad = !Args.empty() && isa(Args[0]); if (IsLoad && LT.second.isVector() && isLegalBroadcastLoad(Tp->getElementType(), LT.second.getVectorElementCount())) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index c68676d..679ee5f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -348,7 +348,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); /// @} }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index b61b9c7..a79cd2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1040,7 +1040,7 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasVOP3PInsts()) { if (cast(VT)->getNumElements() == 2 && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index c0249fb..f2260c3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -198,7 +198,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index dcaec66..9d376ad 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1203,7 +1203,7 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) { InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasNEON()) { if (Kind == TTI::SK_Broadcast) { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 3139c41..d7a2bdb 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -214,7 +214,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 280d8f1..bb0aaa3 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -224,7 +224,7 @@ HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, Type *SubTp, - ArrayRef Args) { + ArrayRef Args) { return 1; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 65eb9d9..1624552 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -126,7 +126,7 @@ public: TTI::TargetCostKind CostKind); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, Type *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 1ee960f..501cfa0 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1016,7 +1016,7 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost( InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, Type *SubTp, - ArrayRef Args) { + ArrayRef Args) { InstructionCost CostFactor = vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 61cb689..4c00055 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -112,7 +112,7 @@ public: const Instruction *CxtI = nullptr); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, Type *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 951635c..ced34d1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -176,7 +176,7 @@ InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) { InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args) { if (Kind == TTI::SK_Splice && isa(Tp)) return getSpliceCost(Tp, Index); return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 60622bf..a0a5110 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -81,7 +81,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 3eb959a..b442892 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -596,7 +596,7 @@ InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasVector()) { unsigned NumVectors = getNumVectorRegs(Tp); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index fc39e09..190f771 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -94,7 +94,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 1079bd6..b018843 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1086,7 +1086,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = TLI->getTypeLegalizationCost(DL, BaseTp); @@ -1551,9 +1551,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, }; if (ST->hasSSE2()) { - bool IsLoad = !Args.empty() && llvm::all_of(Args, [](const Value *V) { - return isa(V); - }); + bool IsLoad = + llvm::any_of(Args, [](const auto &V) { return isa(V); }); if (ST->hasSSE3() && IsLoad) if (const auto *Entry = CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 3dbc2d0..4733503 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -137,7 +137,7 @@ public: InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = None); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 74eb3f2..ee0b858 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5328,7 +5328,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, "No reused scalars expected for broadcast."); return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, /*Mask=*/None, /*Index=*/0, - /*SubTp=*/nullptr, /*Args=*/VL); + /*SubTp=*/nullptr, /*Args=*/VL[0]); } InstructionCost ReuseShuffleCost = 0; if (NeedToShuffleReuses) diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll index bc882b1..10ffb99 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll @@ -13,45 +13,45 @@ define void @shuffle() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4i8 = load <4 x i8>, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i8 = shufflevector <4 x i8> %lv4i8, <4 x i8> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8i8 = load <8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv8i8 = shufflevector <8 x i8> %lv8i8, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8i8 = shufflevector <8 x i8> %lv8i8, <8 x i8> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv16i8 = load <16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv16i8 = shufflevector <16 x i8> %lv16i8, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv16i8 = shufflevector <16 x i8> %lv16i8, <16 x i8> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %lv2i16 = load <2 x i16>, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i16 = shufflevector <2 x i16> %lv2i16, <2 x i16> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i16 = load <4 x i16>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i16 = shufflevector <4 x i16> %lv4i16, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4i16 = shufflevector <4 x i16> %lv4i16, <4 x i16> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8i16 = load <8 x i16>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv8i16 = shufflevector <8 x i16> %lv8i16, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8i16 = shufflevector <8 x i16> %lv8i16, <8 x i16> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv16i16 = load <16 x i16>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i32 = load <2 x i32>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i32 = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2i32 = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i32 = load <4 x i32>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i32 = shufflevector <4 x i32> %lv4i32, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4i32 = shufflevector <4 x i32> %lv4i32, <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv8i32 = load <8 x i32>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i64 = load <2 x i64>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i64 = shufflevector <2 x i64> %lv2i64, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2i64 = shufflevector <2 x i64> %lv2i64, <2 x i64> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4i64 = load <4 x i64>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f16 = load <2 x half>, ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sv2f16 = shufflevector <2 x half> %lv2f16, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2f16 = shufflevector <2 x half> %lv2f16, <2 x half> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4f16 = load <4 x half>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %sv4f16 = shufflevector <4 x half> %lv4f16, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4f16 = shufflevector <4 x half> %lv4f16, <4 x half> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8f16 = load <8 x half>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %sv8f16 = shufflevector <8 x half> %lv8f16, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8f16 = shufflevector <8 x half> %lv8f16, <8 x half> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv16f16 = load <16 x half>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f32 = load <2 x float>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2f32 = shufflevector <2 x float> %lv2f32, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2f32 = shufflevector <2 x float> %lv2f32, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4f32 = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4f32 = shufflevector <4 x float> %lv4f32, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4f32 = shufflevector <4 x float> %lv4f32, <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv8f32 = load <8 x float>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f64 = load <2 x double>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2f64 = shufflevector <2 x double> %lv2f64, <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2f64 = shufflevector <2 x double> %lv2f64, <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4f64 = load <4 x double>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %lv2i8 = load <2 x i8>, ptr undef diff --git a/llvm/test/Analysis/CostModel/X86/splat-load.ll b/llvm/test/Analysis/CostModel/X86/splat-load.ll index 0d9fd437..a0a5757 100644 --- a/llvm/test/Analysis/CostModel/X86/splat-load.ll +++ b/llvm/test/Analysis/CostModel/X86/splat-load.ll @@ -26,7 +26,7 @@ define void @splat_load_2xdouble(<2 x double> *%ptr) { ; ; SSE3-LABEL: 'splat_load_2xdouble' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16 -; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %load = load <2 x double>, <2 x double> *%ptr -- 2.7.4