From fccc7d66c3baf6e80a80b40bf7af640b500ef112 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 12 Apr 2017 11:49:08 +0000 Subject: [PATCH] [SystemZ] TargetTransformInfo cost functions implemented. getArithmeticInstrCost(), getShuffleCost(), getCastInstrCost(), getCmpSelInstrCost(), getVectorInstrCost(), getMemoryOpCost(), getInterleavedMemoryOpCost() implemented. Interleaved access vectorization enabled. BasicTTIImpl::getCastInstrCost() improved to check for legal extending loads, in which case the cost of the z/sext instruction becomes 0. Review: Ulrich Weigand, Renato Golin. https://reviews.llvm.org/D29631 llvm-svn: 300052 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 35 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 8 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 60 +- llvm/lib/Analysis/CostModel.cpp | 14 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 19 +- .../Target/AArch64/AArch64TargetTransformInfo.cpp | 10 +- .../Target/AArch64/AArch64TargetTransformInfo.h | 8 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 10 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 8 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 10 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 8 +- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 4 + .../Target/SystemZ/SystemZTargetTransformInfo.cpp | 549 ++++- .../Target/SystemZ/SystemZTargetTransformInfo.h | 27 + llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 10 +- llvm/lib/Target/X86/X86TargetTransformInfo.h | 8 +- .../Transforms/Scalar/RewriteStatepointsForGC.cpp | 2 +- llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 17 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 +- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 +- llvm/test/Analysis/CostModel/SystemZ/cmp-ext.ll | 2403 ++++++++++++++++++++ llvm/test/Analysis/CostModel/SystemZ/cmpsel.ll | 1987 ++++++++++++++++ llvm/test/Analysis/CostModel/SystemZ/ext-load.ll | 56 + llvm/test/Analysis/CostModel/SystemZ/fp-arith.ll | 119 + llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll | 541 +++++ llvm/test/Analysis/CostModel/SystemZ/int-arith.ll | 326 +++ llvm/test/Analysis/CostModel/SystemZ/int-cast.ll | 199 ++ llvm/test/Analysis/CostModel/SystemZ/load_store.ll | 137 ++ llvm/test/Analysis/CostModel/SystemZ/logical.ll | 277 +++ .../CostModel/SystemZ/memop-folding-int-arith.ll | 259 +++ .../CostModel/SystemZ/scalar-cmp-cmp-log-sel.ll | 1624 +++++++++++++ llvm/test/Analysis/CostModel/SystemZ/shuffle.ll | 112 + .../Analysis/CostModel/SystemZ/vectorinstrs.ll | 56 + .../SystemZ/mem-interleaving-costs.ll | 70 + 34 files changed, 8899 insertions(+), 104 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/SystemZ/cmp-ext.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/cmpsel.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/ext-load.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/fp-arith.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/int-arith.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/int-cast.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/load_store.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/logical.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/scalar-cmp-cmp-log-sel.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/shuffle.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/vectorinstrs.ll create mode 100644 llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 8eb9ebc..3d92208 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -572,8 +572,10 @@ public: Type *SubTp = nullptr) const; /// \return The expected cost of cast instructions, such as bitcast, trunc, - /// zext, etc. - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + /// zext, etc. If there is an existing instruction that holds Opcode, it + /// may be passed in the 'I' parameter. + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr) const; /// \return The expected cost of a sign- or zero-extended vector extract. Use /// -1 to indicate that there is no information about the index value. @@ -584,9 +586,11 @@ public: /// Phi, Ret, Br. int getCFInstrCost(unsigned Opcode) const; - /// \returns The expected cost of compare and select instructions. + /// \returns The expected cost of compare and select instructions. If there + /// is an existing instruction that holds Opcode, it may be passed in the + /// 'I' parameter. int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = nullptr) const; + Type *CondTy = nullptr, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. @@ -594,7 +598,7 @@ public: /// \return The cost of Load and Store instructions. int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace, const Instruction *I = nullptr) const; /// \return The cost of masked Load and Store instructions. int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, @@ -821,16 +825,17 @@ public: ArrayRef Args) = 0; virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) = 0; - virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; + virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) = 0; virtual int getCFInstrCost(unsigned Opcode) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) = 0; + Type *CondTy, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) = 0; + unsigned AddressSpace, const Instruction *I) = 0; virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) = 0; @@ -1065,8 +1070,9 @@ public: Type *SubTp) override { return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { - return Impl.getCastInstrCost(Opcode, Dst, Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) override { + return Impl.getCastInstrCost(Opcode, Dst, Src, I); } int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) override { @@ -1075,15 +1081,16 @@ public: int getCFInstrCost(unsigned Opcode) override { return Impl.getCFInstrCost(Opcode); } - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) override { + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) override { - return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + unsigned AddressSpace, const Instruction *I) override { + return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); } int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index f3d4709..790acbc 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -334,7 +334,8 @@ public: return 1; } - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return 1; } + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { return 1; } unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) { @@ -343,7 +344,8 @@ public: unsigned getCFInstrCost(unsigned Opcode) { return 1; } - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { return 1; } @@ -352,7 +354,7 @@ public: } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 67d8d59..e30e947 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -308,8 +308,7 @@ public: /// Estimate the overhead of scalarizing an instructions unique /// non-constant operands. The types of the arguments are ordinarily - /// scalar, in which case the costs are multiplied with VF. Vector - /// arguments are allowed if 1 is passed for VF. + /// scalar, in which case the costs are multiplied with VF. unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF) { unsigned Cost = 0; @@ -318,8 +317,10 @@ public: if (!isa(A) && UniqueOperands.insert(A).second) { Type *VecTy = nullptr; if (A->getType()->isVectorTy()) { - assert (VF == 1 && "Vector argument passed with VF > 1"); VecTy = A->getType(); + // If A is a vector operand, VF should be 1 or correspond to A. + assert ((VF == 1 || VF == VecTy->getVectorNumElements()) && + "Vector argument does not match VF"); } else VecTy = VectorType::get(A->getType(), VF); @@ -331,6 +332,23 @@ public: return Cost; } + unsigned getScalarizationOverhead(Type *VecTy, ArrayRef Args) { + assert (VecTy->isVectorTy()); + + unsigned Cost = 0; + + Cost += getScalarizationOverhead(VecTy, true, false); + if (!Args.empty()) + Cost += getOperandsScalarizationOverhead(Args, + VecTy->getVectorNumElements()); + else + // When no information on arguments is provided, we add the cost + // associated with one argument as a heuristic. + Cost += getScalarizationOverhead(VecTy, false, true); + + return Cost; + } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost( @@ -373,15 +391,7 @@ public: ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost; - if (!Args.empty()) - TotCost += getOperandsScalarizationOverhead(Args, Num); - else - // When no information on arguments is provided, we add the cost - // associated with one argument as a heuristic. - TotCost += getScalarizationOverhead(Ty, false, true); - - return TotCost; + return getScalarizationOverhead(Ty, Args) + Num * Cost; } // We don't know anything about this scalar instruction. @@ -397,7 +407,8 @@ public: return 1; } - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -426,6 +437,18 @@ public: Dst->getPointerAddressSpace())) return 0; + // If this is a zext/sext of a load, return 0 if the corresponding + // extending load exists on target. + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + I && isa(I->getOperand(0))) { + EVT ExtVT = EVT::getEVT(Dst); + EVT LoadVT = EVT::getEVT(Src); + unsigned LType = + ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); + if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) + return 0; + } + // If the cast is marked as legal (or promote) then assume low cost. if (SrcLT.first == DstLT.first && TLI->isOperationLegalOrPromote(ISD, DstLT.second)) @@ -483,14 +506,14 @@ public: Src->getVectorNumElements() / 2); T *TTI = static_cast(this); return TTI->getVectorSplitCost() + - (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc)); + (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); } // In other cases where the source or destination are illegal, assume // the operation will get scalarized. unsigned Num = Dst->getVectorNumElements(); unsigned Cost = static_cast(this)->getCastInstrCost( - Opcode, Dst->getScalarType(), Src->getScalarType()); + Opcode, Dst->getScalarType(), Src->getScalarType(), I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -524,7 +547,8 @@ public: return 0; } - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -552,7 +576,7 @@ public: if (CondTy) CondTy = CondTy->getScalarType(); unsigned Cost = static_cast(this)->getCmpSelInstrCost( - Opcode, ValTy->getScalarType(), CondTy); + Opcode, ValTy->getScalarType(), CondTy, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -571,7 +595,7 @@ public: } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I = nullptr) { assert(!Src->isVoidTy() && "Invalid type"); std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index 757a1e5..32bfea5 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -447,25 +447,25 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { case Instruction::Select: { const SelectInst *SI = cast(I); Type *CondTy = SI->getCondition()->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); } case Instruction::Store: { const StoreInst *SI = cast(I); Type *ValTy = SI->getValueOperand()->getType(); return TTI->getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); + SI->getAlignment(), + SI->getPointerAddressSpace(), I); } case Instruction::Load: { const LoadInst *LI = cast(I); return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); + LI->getAlignment(), + LI->getPointerAddressSpace(), I); } case Instruction::ZExt: case Instruction::SExt: @@ -481,7 +481,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); - return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); } case Instruction::ExtractElement: { const ExtractElementInst * EEI = cast(I); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 0771eea..c8b8740 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -314,8 +314,10 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src); + Type *Src, const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -335,8 +337,10 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); + Type *CondTy, const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -350,8 +354,11 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const { - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + unsigned AddressSpace, + const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 531cb97..4d59da0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -176,7 +176,8 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -436,7 +437,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, } int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) { + Type *CondTy, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -463,11 +464,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return Entry->Cost; } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, - unsigned Alignment, unsigned AddressSpace) { + unsigned Alignment, unsigned AddressSpace, + const Instruction *I) { auto LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index db01725..e37c003 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -86,7 +86,8 @@ public: unsigned getMaxInterleaveFactor(unsigned VF); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index); @@ -103,10 +104,11 @@ public: int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getCostOfKeepingLiveOverCall(ArrayRef Tys); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index f2662a6..8eb9dbf 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -92,7 +92,8 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, } -int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -310,7 +311,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } -int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { +int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. @@ -335,7 +337,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { return LT.first; } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -504,7 +506,7 @@ int ARMTTIImpl::getArithmeticInstrCost( } int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 5ee1f7c..7de0543 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -94,9 +94,11 @@ public: int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -114,7 +116,7 @@ public: ArrayRef Args = ArrayRef()); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2f1bcea..7ee1317 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -302,14 +302,16 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return LT.first; } -int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); return BaseT::getCastInstrCost(Opcode, Dst, Src); } -int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); +int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -352,7 +354,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { } int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 30ee281..6ce70fb 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -74,11 +74,13 @@ public: TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef()); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 140ee29..84d3c7b 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -347,9 +347,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // There should be no need to check for float types other than v2f64 // since <2 x f32> isn't a legal type. setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); } // Handle floating-point types. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index b10c0e0..e74c9a8 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -259,11 +259,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, } } if (isa(&I)) { - NumStores++; Type *MemAccessTy = I.getOperand(0)->getType(); - if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) && - (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128)) - NumStores++; // 128 bit fp/int stores get split. + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0); } } @@ -313,3 +310,547 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { return 0; } +int SystemZTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args) { + + // TODO: return a good value for BB-VECTORIZER that includes the + // immediate loads, which we do not want to count for the loop + // vectorizer, since they are hopefully hoisted out of the loop. This + // would require a new parameter 'InLoop', but not sure if constant + // args are common enough to motivate this. + + unsigned ScalarBits = Ty->getScalarSizeInBits(); + + if (Ty->isVectorTy()) { + assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type."); + unsigned VF = Ty->getVectorNumElements(); + unsigned NumVectors = getNumberOfParts(Ty); + + // These vector operations are custom handled, but are still supported + // with one instruction per vector, regardless of element size. + if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || + Opcode == Instruction::AShr) { + return NumVectors; + } + + // These FP operations are supported with a single vector instruction for + // double (base implementation assumes float generally costs 2). For + // FP128, the scalar cost is 1, and there is no overhead since the values + // are already in scalar registers. + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) { + switch (ScalarBits) { + case 32: { + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); + // FIXME: VF 2 for these FP operations are currently just as + // expensive as for VF 4. + if (VF == 2) + Cost *= 2; + return Cost; + } + case 64: + case 128: + return NumVectors; + default: + break; + } + } + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) { + unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args); + // FIXME: VF 2 for float is currently just as expensive as for VF 4. + if (VF == 2 && ScalarBits == 32) + Cost *= 2; + return Cost; + } + } + else { // Scalar: + // These FP operations are supported with a dedicated instruction for + // float, double and fp128 (base implementation assumes float generally + // costs 2). + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub || + Opcode == Instruction::FMul || Opcode == Instruction::FDiv) + return 1; + + // There is no native support for FRem. + if (Opcode == Instruction::FRem) + return LIBCALL_COST; + + if (Opcode == Instruction::LShr || Opcode == Instruction::AShr) + return (ScalarBits >= 32 ? 1 : 2 /*ext*/); + + // Or requires one instruction, although it has custom handling for i64. + if (Opcode == Instruction::Or) + return 1; + + if (Opcode == Instruction::Xor && ScalarBits == 1) + // 2 * ipm sequences ; xor ; shift ; compare + return 7; + + // An extra extension for narrow types is needed. + if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) + // sext of op(s) for narrow types + return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1)); + + if (Opcode == Instruction::UDiv || Opcode == Instruction::URem) + // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r + return (ScalarBits < 32 ? 4 : 2); + } + + // Fallback to the default implementation. + return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo, Args); +} + + +int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { + assert (Tp->isVectorTy()); + assert (ST->hasVector() && "getShuffleCost() called."); + unsigned NumVectors = getNumberOfParts(Tp); + + // TODO: Since fp32 is expanded, the shuffle cost should always be 0. + + // FP128 values are always in scalar registers, so there is no work + // involved with a shuffle, except for broadcast. In that case register + // moves are done with a single instruction per element. + if (Tp->getScalarType()->isFP128Ty()) + return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0); + + switch (Kind) { + case TargetTransformInfo::SK_ExtractSubvector: + // ExtractSubvector Index indicates start offset. + + // Extracting a subvector from first index is a noop. + return (Index == 0 ? 0 : NumVectors); + + case TargetTransformInfo::SK_Broadcast: + // Loop vectorizer calls here to figure out the extra cost of + // broadcasting a loaded value to all elements of a vector. Since vlrep + // loads and replicates with a single instruction, adjust the returned + // value. + return NumVectors - 1; + + default: + + // SystemZ supports single instruction permutation / replication. + return NumVectors; + } + + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); +} + +// Return the log2 difference of the element sizes of the two vector types. +static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) { + unsigned Bits0 = Ty0->getScalarSizeInBits(); + unsigned Bits1 = Ty1->getScalarSizeInBits(); + + if (Bits1 > Bits0) + return (Log2_32(Bits1) - Log2_32(Bits0)); + + return (Log2_32(Bits0) - Log2_32(Bits1)); +} + +// Return the number of instructions needed to truncate SrcTy to DstTy. +unsigned SystemZTTIImpl:: +getVectorTruncCost(Type *SrcTy, Type *DstTy) { + assert (SrcTy->isVectorTy() && DstTy->isVectorTy()); + assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() && + "Packing must reduce size of vector type."); + assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() && + "Packing should not change number of elements."); + + // TODO: Since fp32 is expanded, the extract cost should always be 0. + + unsigned NumParts = getNumberOfParts(SrcTy); + if (NumParts <= 2) + // Up to 2 vector registers can be truncated efficiently with pack or + // permute. The latter requires an immediate mask to be loaded, which + // typically gets hoisted out of a loop. TODO: return a good value for + // BB-VECTORIZER that includes the immediate loads, which we do not want + // to count for the loop vectorizer. + return 1; + + unsigned Cost = 0; + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + unsigned VF = SrcTy->getVectorNumElements(); + for (unsigned P = 0; P < Log2Diff; ++P) { + if (NumParts > 1) + NumParts /= 2; + Cost += NumParts; + } + + // Currently, a general mix of permutes and pack instructions is output by + // isel, which follow the cost computation above except for this case which + // is one instruction less: + if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 && + DstTy->getScalarSizeInBits() == 8) + Cost--; + + return Cost; +} + +// Return the cost of converting a vector bitmask produced by a compare +// (SrcTy), to the type of the select or extend instruction (DstTy). +unsigned SystemZTTIImpl:: +getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) { + assert (SrcTy->isVectorTy() && DstTy->isVectorTy() && + "Should only be called with vector types."); + + unsigned PackCost = 0; + unsigned SrcScalarBits = SrcTy->getScalarSizeInBits(); + unsigned DstScalarBits = DstTy->getScalarSizeInBits(); + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + if (SrcScalarBits > DstScalarBits) + // The bitmask will be truncated. + PackCost = getVectorTruncCost(SrcTy, DstTy); + else if (SrcScalarBits < DstScalarBits) { + unsigned DstNumParts = getNumberOfParts(DstTy); + // Each vector select needs its part of the bitmask unpacked. + PackCost = Log2Diff * DstNumParts; + // Extra cost for moving part of mask before unpacking. + PackCost += DstNumParts - 1; + } + + return PackCost; +} + +// Return the type of the compared operands. This is needed to compute the +// cost for a Select / ZExt or SExt instruction. +static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { + Type *OpTy = nullptr; + if (CmpInst *CI = dyn_cast(I->getOperand(0))) + OpTy = CI->getOperand(0)->getType(); + else if (Instruction *LogicI = dyn_cast(I->getOperand(0))) + if (CmpInst *CI0 = dyn_cast(LogicI->getOperand(0))) + if (isa(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); + + if (OpTy != nullptr) { + if (VF == 1) { + assert (!OpTy->isVectorTy() && "Expected scalar type"); + return OpTy; + } + // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may + // be either scalar or already vectorized with a same or lesser VF. + Type *ElTy = OpTy->getScalarType(); + return VectorType::get(ElTy, VF); + } + + return nullptr; +} + +int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { + unsigned DstScalarBits = Dst->getScalarSizeInBits(); + unsigned SrcScalarBits = Src->getScalarSizeInBits(); + + if (Src->isVectorTy()) { + assert (ST->hasVector() && "getCastInstrCost() called with vector type."); + assert (Dst->isVectorTy()); + unsigned VF = Src->getVectorNumElements(); + unsigned NumDstVectors = getNumberOfParts(Dst); + unsigned NumSrcVectors = getNumberOfParts(Src); + + if (Opcode == Instruction::Trunc) { + if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits()) + return 0; // Check for NOOP conversions. + return getVectorTruncCost(Src, Dst); + } + + if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { + if (SrcScalarBits >= 8) { + // ZExt/SExt will be handled with one unpack per doubling of width. + unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst); + + // For types that spans multiple vector registers, some additional + // instructions are used to setup the unpacking. + unsigned NumSrcVectorOps = + (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors) + : (NumDstVectors / 2)); + + return (NumUnpacks * NumDstVectors) + NumSrcVectorOps; + } + else if (SrcScalarBits == 1) { + // This should be extension of a compare i1 result. + // If we know what the widths of the compared operands, get the + // cost of converting it to Dst. Otherwise assume same widths. + unsigned Cost = 0; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst); + if (Opcode == Instruction::ZExt) + // One 'vn' per dst vector with an immediate mask. + Cost += NumDstVectors; + return Cost; + } + } + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP || + Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) { + // TODO: Fix base implementation which could simplify things a bit here + // (seems to miss on differentiating on scalar/vector types). + + // Only 64 bit vector conversions are natively supported. + if (SrcScalarBits == 64 && DstScalarBits == 64) + return NumDstVectors; + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. Base implementation does not + // realize float->int gets scalarized. + unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + unsigned TotCost = VF * ScalarCost; + bool NeedsInserts = true, NeedsExtracts = true; + // FP128 registers do not get inserted or extracted. + if (DstScalarBits == 128 && + (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)) + NeedsInserts = false; + if (SrcScalarBits == 128 && + (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI)) + NeedsExtracts = false; + + TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts); + + // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4. + if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32) + TotCost *= 2; + + return TotCost; + } + + if (Opcode == Instruction::FPTrunc) { + if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements. + return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false); + else // double -> float + return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/); + } + + if (Opcode == Instruction::FPExt) { + if (SrcScalarBits == 32 && DstScalarBits == 64) { + // float -> double is very rare and currently unoptimized. Instead of + // using vldeb, which can do two at a time, all conversions are + // scalarized. + return VF * 2; + } + // -> fp128. VF * lxdb/lxeb + extraction of elements. + return VF + getScalarizationOverhead(Src, false, true); + } + } + else { // Scalar + assert (!Dst->isVectorTy()); + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) + return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/); + + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + Src->isIntegerTy(1)) { + // This should be extension of a compare i1 result, which is done with + // ipm and a varying sequence of instructions. + unsigned Cost = 0; + if (Opcode == Instruction::SExt) + Cost = (DstScalarBits < 64 ? 3 : 4); + if (Opcode == Instruction::ZExt) + Cost = 3; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); + if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) + // If operands of an fp-type was compared, this costs +1. + Cost++; + + return Cost; + } + } + + return BaseT::getCastInstrCost(Opcode, Dst, Src, I); +} + +int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { + if (ValTy->isVectorTy()) { + assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type."); + assert (CondTy == nullptr || CondTy->isVectorTy()); + unsigned VF = ValTy->getVectorNumElements(); + + // Called with a compare instruction. + if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { + unsigned PredicateExtraCost = 0; + if (I != nullptr) { + // Some predicates cost one or two extra instructions. + switch (dyn_cast(I)->getPredicate()) { + case CmpInst::Predicate::ICMP_NE: + case CmpInst::Predicate::ICMP_UGE: + case CmpInst::Predicate::ICMP_ULE: + case CmpInst::Predicate::ICMP_SGE: + case CmpInst::Predicate::ICMP_SLE: + PredicateExtraCost = 1; + break; + case CmpInst::Predicate::FCMP_ONE: + case CmpInst::Predicate::FCMP_ORD: + case CmpInst::Predicate::FCMP_UEQ: + case CmpInst::Predicate::FCMP_UNO: + PredicateExtraCost = 2; + break; + default: + break; + } + } + + // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of + // floats. FIXME: <2 x float> generates same code as <4 x float>. + unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1); + unsigned NumVecs_cmp = getNumberOfParts(ValTy); + + unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost)); + return Cost; + } + else { // Called with a select instruction. + assert (Opcode == Instruction::Select); + + // We can figure out the extra cost of packing / unpacking if the + // instruction was passed and the compare instruction is found. + unsigned PackCost = 0; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + PackCost = + getVectorBitmaskConversionCost(CmpOpTy, ValTy); + + return getNumberOfParts(ValTy) /*vsel*/ + PackCost; + } + } + else { // Scalar + switch (Opcode) { + case Instruction::ICmp: { + unsigned Cost = 1; + if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) + Cost += 2; // extend both operands + return Cost; + } + case Instruction::Select: + if (ValTy->isFloatingPointTy()) + return 4; // No load on condition for FP, so this costs a conditional jump. + return 1; // Load On Condition. + } + } + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); +} + +int SystemZTTIImpl:: +getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + // vlvgp will insert two grs into a vector register, so only count half the + // number of instructions. + if (Opcode == Instruction::InsertElement && + Val->getScalarType()->isIntegerTy(64)) + return ((Index % 2 == 0) ? 1 : 0); + + if (Opcode == Instruction::ExtractElement) { + int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1); + + // Give a slight penalty for moving out of vector pipeline to FXU unit. + if (Index == 0 && Val->getScalarType()->isIntegerTy()) + Cost += 1; + + return Cost; + } + + return BaseT::getVectorInstrCost(Opcode, Val, Index); +} + +int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, unsigned AddressSpace, + const Instruction *I) { + assert(!Src->isVoidTy() && "Invalid type"); + + if (!Src->isVectorTy() && Opcode == Instruction::Load && + I != nullptr && I->hasOneUse()) { + const Instruction *UserI = cast(*I->user_begin()); + unsigned Bits = Src->getScalarSizeInBits(); + bool FoldsLoad = false; + switch (UserI->getOpcode()) { + case Instruction::ICmp: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // This also makes sense for float operations, but disabled for now due + // to regressions. + // case Instruction::FCmp: + // case Instruction::FAdd: + // case Instruction::FSub: + // case Instruction::FMul: + // case Instruction::FDiv: + FoldsLoad = (Bits == 32 || Bits == 64); + break; + } + + if (FoldsLoad) { + assert (UserI->getNumOperands() == 2 && + "Expected to only handle binops."); + + // UserI can't fold two loads, so in that case return 0 cost only + // half of the time. + for (unsigned i = 0; i < 2; ++i) { + if (UserI->getOperand(i) == I) + continue; + if (LoadInst *LI = dyn_cast(UserI->getOperand(i))) { + if (LI->hasOneUse()) + return i == 0; + } + } + + return 0; + } + } + + unsigned NumOps = getNumberOfParts(Src); + + if (Src->getScalarSizeInBits() == 128) + // 128 bit scalars are held in a pair of two 64 bit registers. + NumOps *= 2; + + return NumOps; +} + +int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace) { + assert(isa(VecTy) && + "Expect a vector type for interleaved memory op"); + + unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ? + (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits()); + assert (WideBits > 0 && "Could not compute size of vector"); + int NumWideParts = + ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U)); + + // How many source vectors are handled to produce a vectorized operand? + int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts); + int NumSrcParts = + ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts); + + // A Load group may have gaps. + unsigned NumOperands = + ((Opcode == Instruction::Load) ? Indices.size() : Factor); + + // Each needed permute takes two vectors as input. + if (NumSrcParts > 1) + NumSrcParts--; + int NumPermutes = NumSrcParts * NumOperands; + + // Cost of load/store operations and the permutations needed. + return NumWideParts + NumPermutes; +} diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index f7d2d82..d2639cb 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -27,6 +27,8 @@ class SystemZTTIImpl : public BasicTTIImplBase { const SystemZSubtarget *getST() const { return ST; } const SystemZTargetLowering *getTLI() const { return TLI; } + unsigned const LIBCALL_COST = 30; + public: explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -53,6 +55,31 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); + bool enableInterleavedAccessVectorization() { return true; } + + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef()); + int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); + unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I = nullptr); + + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace); /// @} }; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ea8aa5c..b742fb4 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -938,7 +938,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1304,7 +1305,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return BaseT::getCastInstrCost(Opcode, Dst, Src); } -int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { +int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -1370,7 +1372,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) return LT.first * Entry->Cost; - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, @@ -1615,7 +1617,7 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { } int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast(Src)) { unsigned NumElem = VTy->getVectorNumElements(); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 2aa94fd..9bef9e8 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -62,11 +62,13 @@ public: TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef()); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 7223370..f344eb1 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1882,7 +1882,7 @@ chainToBasePointerCost(SmallVectorImpl &Chain, "non noop cast is found during rematerialization"); Type *SrcTy = CI->getOperand(0)->getType(); - Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy); + Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI); } else if (GetElementPtrInst *GEP = dyn_cast(Instr)) { // Cost of the address calculation diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp index 9e97d9a..c83b3f7 100644 --- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp @@ -550,7 +550,8 @@ namespace { TargetTransformInfo::OperandValueKind Op1VK = TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OperandValueKind Op2VK = - TargetTransformInfo::OK_AnyValue) { + TargetTransformInfo::OK_AnyValue, + const Instruction *I = nullptr) { switch (Opcode) { default: break; case Instruction::GetElementPtr: @@ -584,7 +585,7 @@ namespace { case Instruction::Select: case Instruction::ICmp: case Instruction::FCmp: - return TTI->getCmpSelInstrCost(Opcode, T1, T2); + return TTI->getCmpSelInstrCost(Opcode, T1, T2, I); case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -598,7 +599,7 @@ namespace { case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::ShuffleVector: - return TTI->getCastInstrCost(Opcode, T1, T2); + return TTI->getCastInstrCost(Opcode, T1, T2, I); } return 1; @@ -1044,14 +1045,14 @@ namespace { return false; } } else if (TTI) { - unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2); - unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); - Type *VT1 = getVecTypeForPair(IT1, JT1), - *VT2 = getVecTypeForPair(IT2, JT2); TargetTransformInfo::OperandValueKind Op1VK = TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueKind Op2VK = TargetTransformInfo::OK_AnyValue; + unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2, Op1VK, Op2VK, I); + unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2, Op1VK, Op2VK, J); + Type *VT1 = getVecTypeForPair(IT1, JT1), + *VT2 = getVecTypeForPair(IT2, JT2); // On some targets (example X86) the cost of a vector shift may vary // depending on whether the second operand is a Uniform or @@ -1090,7 +1091,7 @@ namespace { // but this cost is ignored (because insert and extract element // instructions are assigned a zero depth factor and are not really // fused in general). - unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK); + unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK, I); if (VCost > ICost + JCost) return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fff1e29..f891cd9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7048,7 +7048,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, - AS); + AS, I); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -7078,7 +7078,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, if (Legal->isMaskRequired(I)) Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -7154,7 +7154,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, unsigned AS = getMemInstAlignment(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); } return getWideningCost(I, VF); } @@ -7369,7 +7369,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -7378,7 +7378,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I); } case Instruction::Store: case Instruction::Load: { @@ -7403,7 +7403,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (isOptimizableIVTruncate(I, VF)) { auto *Trunc = cast(I); return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), - Trunc->getSrcTy()); + Trunc->getSrcTy(), Trunc); } Type *SrcScalarTy = I->getOperand(0)->getType(); @@ -7427,7 +7427,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } } - return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); + return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); } case Instruction::Call: { bool NeedToScalarize; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b9df89e..df7dc2a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1762,10 +1762,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Calculate the cost of this instruction. int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), - VL0->getType(), SrcTy); + VL0->getType(), SrcTy, VL0); VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); - int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); + int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0); return VecCost - ScalarCost; } case Instruction::FCmp: @@ -1774,8 +1774,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Calculate the cost of this instruction. VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * - TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); - int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy); + TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty(), VL0); + int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy, VL0); return VecCost - ScalarCost; } case Instruction::Add: @@ -1858,18 +1858,18 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Cost of wide load - cost of scalar loads. unsigned alignment = dyn_cast(VL0)->getAlignment(); int ScalarLdCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0); + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, - VecTy, alignment, 0); + VecTy, alignment, 0, VL0); return VecLdCost - ScalarLdCost; } case Instruction::Store: { // We know that we can merge the stores. Calculate the cost. unsigned alignment = dyn_cast(VL0)->getAlignment(); int ScalarStCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0); + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0); int VecStCost = TTI->getMemoryOpCost(Instruction::Store, - VecTy, alignment, 0); + VecTy, alignment, 0, VL0); return VecStCost - ScalarStCost; } case Instruction::Call: { diff --git a/llvm/test/Analysis/CostModel/SystemZ/cmp-ext.ll b/llvm/test/Analysis/CostModel/SystemZ/cmp-ext.ll new file mode 100644 index 0000000..e335878 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/cmp-ext.ll @@ -0,0 +1,2403 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; + +define i8 @fun0(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = sext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun0 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i8 +} + +define i16 @fun1(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = sext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun1 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i16 +} + +define i32 @fun2(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = sext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun2 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i32 +} + +define i64 @fun3(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = sext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun3 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i64 +} + +define i8 @fun4(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = sext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun4 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i8 +} + +define i16 @fun5(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = sext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun5 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i16 +} + +define i32 @fun6(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = sext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun6 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i32 +} + +define i64 @fun7(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = sext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun7 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i64 +} + +define i8 @fun8(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = sext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun8 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i8 +} + +define i16 @fun9(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = sext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun9 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i16 +} + +define i32 @fun10(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = sext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun10 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i32 +} + +define i64 @fun11(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = sext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun11 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i64 +} + +define i8 @fun12(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = sext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun12 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i8 +} + +define i16 @fun13(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = sext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun13 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i16 +} + +define i32 @fun14(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = sext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun14 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext i1 %cmp to i32 +} + +define i64 @fun15(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = sext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun15 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i64 +} + +define i8 @fun16(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = sext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun16 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i8 +} + +define i16 @fun17(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = sext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun17 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i16 +} + +define i32 @fun18(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = sext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun18 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i32 +} + +define i64 @fun19(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = sext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun19 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 5 for instruction: %v = sext i1 %cmp to i64 +} + +define i8 @fun20(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = sext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun20 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i8 +} + +define i16 @fun21(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = sext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun21 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i16 +} + +define i32 @fun22(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = sext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun22 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext i1 %cmp to i32 +} + +define i64 @fun23(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = sext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun23 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 5 for instruction: %v = sext i1 %cmp to i64 +} + +define <2 x i8> @fun24(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun24 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun25(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun25 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun26(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun26 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun27(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun27 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun28(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun28 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun29(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun29 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun30(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun30 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun31(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun31 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun32(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun32 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun33(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun33 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun34(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun34 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun35(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun35 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun36(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun36 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun37(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun37 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun38(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun38 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun39(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun39 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun40(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun40 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun41(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun41 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun42(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun42 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun43(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun43 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun44(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun44 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun45(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun45 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun46(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun46 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun47(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun47 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <2 x i1> %cmp to <2 x i64> +} + +define <4 x i8> @fun48(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun48 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun49(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun49 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun50(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun50 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun51(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun51 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = sext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun52(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun52 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun53(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun53 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun54(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun54 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun55(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun55 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = sext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun56(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun56 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun57(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun57 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun58(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun58 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun59(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun59 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun60(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun60 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun61(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun61 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun62(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun62 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun63(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun63 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun64(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun64 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun65(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun65 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun66(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun66 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun67(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun67 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun68(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun68 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun69(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun69 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun70(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun70 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun71(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun71 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <4 x i1> %cmp to <4 x i64> +} + +define <8 x i8> @fun72(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun72 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun73(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun73 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun74(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun74 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = sext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun75(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun75 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 15 for instruction: %v = sext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun76(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun76 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun77(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun77 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun78(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun78 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun79(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun79 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 11 for instruction: %v = sext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun80(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun80 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun81(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun81 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun82(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun82 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun83(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun83 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = sext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun84(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun84 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun85(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun85 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun86(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun86 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun87(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun87 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun88(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun88 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun89(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun89 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun90(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun90 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun91(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun91 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = sext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun92(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun92 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun93(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun93 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun94(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun94 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun95(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun95 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <8 x i1> %cmp to <8 x i64> +} + +define <16 x i8> @fun96(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun96 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun97(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun97 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun98(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun98 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 11 for instruction: %v = sext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun99(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun99 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 31 for instruction: %v = sext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun100(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun100 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun101(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun101 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun102(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun102 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = sext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun103(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun103 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 23 for instruction: %v = sext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun104(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun104 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun105(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun105 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun106(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun106 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun107(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun107 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 15 for instruction: %v = sext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun108(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun108 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = sext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun109(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun109 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 6 for instruction: %v = sext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun110(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun110 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun111(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun111 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun112(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun112 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = sext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun113(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun113 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = sext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun114(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun114 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun115(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun115 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 15 for instruction: %v = sext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun116(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun116 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = sext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun117(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun117 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 6 for instruction: %v = sext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun118(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun118 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun119(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun119 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 0 for instruction: %v = sext <16 x i1> %cmp to <16 x i64> +} + +define i8 @fun120(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = zext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun120 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i8 +} + +define i16 @fun121(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = zext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun121 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i16 +} + +define i32 @fun122(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = zext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun122 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i32 +} + +define i64 @fun123(i8 %val1, i8 %val2) { + %cmp = icmp eq i8 %val1, %val2 + %v = zext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun123 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i64 +} + +define i8 @fun124(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = zext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun124 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i8 +} + +define i16 @fun125(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = zext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun125 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i16 +} + +define i32 @fun126(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = zext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun126 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i32 +} + +define i64 @fun127(i16 %val1, i16 %val2) { + %cmp = icmp eq i16 %val1, %val2 + %v = zext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun127 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i64 +} + +define i8 @fun128(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = zext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun128 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i8 +} + +define i16 @fun129(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = zext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun129 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i16 +} + +define i32 @fun130(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = zext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun130 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i32 +} + +define i64 @fun131(i32 %val1, i32 %val2) { + %cmp = icmp eq i32 %val1, %val2 + %v = zext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun131 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i64 +} + +define i8 @fun132(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = zext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun132 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i8 +} + +define i16 @fun133(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = zext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun133 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i16 +} + +define i32 @fun134(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = zext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun134 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i32 +} + +define i64 @fun135(i64 %val1, i64 %val2) { + %cmp = icmp eq i64 %val1, %val2 + %v = zext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun135 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext i1 %cmp to i64 +} + +define i8 @fun136(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = zext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun136 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i8 +} + +define i16 @fun137(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = zext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun137 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i16 +} + +define i32 @fun138(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = zext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun138 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i32 +} + +define i64 @fun139(float %val1, float %val2) { + %cmp = fcmp ogt float %val1, %val2 + %v = zext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun139 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i64 +} + +define i8 @fun140(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = zext i1 %cmp to i8 + ret i8 %v + +; CHECK: fun140 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i8 +} + +define i16 @fun141(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = zext i1 %cmp to i16 + ret i16 %v + +; CHECK: fun141 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i16 +} + +define i32 @fun142(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = zext i1 %cmp to i32 + ret i32 %v + +; CHECK: fun142 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i32 +} + +define i64 @fun143(double %val1, double %val2) { + %cmp = fcmp ogt double %val1, %val2 + %v = zext i1 %cmp to i64 + ret i64 %v + +; CHECK: fun143 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext i1 %cmp to i64 +} + +define <2 x i8> @fun144(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun144 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun145(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun145 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun146(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun146 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun147(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun147 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun148(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun148 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun149(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun149 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun150(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun150 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun151(<2 x i16> %val1, <2 x i16> %val2) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun151 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun152(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun152 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun153(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun153 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun154(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun154 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun155(<2 x i32> %val1, <2 x i32> %val2) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun155 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun156(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun156 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun157(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun157 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun158(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun158 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun159(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun159 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun160(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun160 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun161(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun161 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun162(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun162 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun163(<2 x float> %val1, <2 x float> %val2) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun163 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i64> +} + +define <2 x i8> @fun164(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i8> + ret <2 x i8> %v + +; CHECK: fun164 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i8> +} + +define <2 x i16> @fun165(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i16> + ret <2 x i16> %v + +; CHECK: fun165 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i16> +} + +define <2 x i32> @fun166(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i32> + ret <2 x i32> %v + +; CHECK: fun166 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <2 x i1> %cmp to <2 x i32> +} + +define <2 x i64> @fun167(<2 x double> %val1, <2 x double> %val2) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %v = zext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %v + +; CHECK: fun167 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <2 x i1> %cmp to <2 x i64> +} + +define <4 x i8> @fun168(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun168 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun169(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun169 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun170(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun170 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %v = zext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun171(<4 x i8> %val1, <4 x i8> %val2) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun171 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 9 for instruction: %v = zext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun172(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun172 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun173(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun173 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun174(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun174 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun175(<4 x i16> %val1, <4 x i16> %val2) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun175 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = zext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun176(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun176 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun177(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun177 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun178(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun178 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun179(<4 x i32> %val1, <4 x i32> %val2) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun179 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = zext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun180(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun180 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun181(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun181 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun182(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun182 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun183(<4 x i64> %val1, <4 x i64> %val2) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun183 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun184(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun184 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun185(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun185 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun186(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun186 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun187(<4 x float> %val1, <4 x float> %val2) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun187 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = zext <4 x i1> %cmp to <4 x i64> +} + +define <4 x i8> @fun188(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %v + +; CHECK: fun188 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i8> +} + +define <4 x i16> @fun189(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %v + +; CHECK: fun189 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i16> +} + +define <4 x i32> @fun190(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %v + +; CHECK: fun190 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i32> +} + +define <4 x i64> @fun191(<4 x double> %val1, <4 x double> %val2) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %v = zext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %v + +; CHECK: fun191 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <4 x i1> %cmp to <4 x i64> +} + +define <8 x i8> @fun192(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun192 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun193(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun193 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun194(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun194 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 7 for instruction: %v = zext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun195(<8 x i8> %val1, <8 x i8> %val2) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun195 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 19 for instruction: %v = zext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun196(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun196 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun197(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun197 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun198(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun198 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = zext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun199(<8 x i16> %val1, <8 x i16> %val2) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun199 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 15 for instruction: %v = zext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun200(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun200 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun201(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun201 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun202(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun202 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun203(<8 x i32> %val1, <8 x i32> %val2) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun203 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 11 for instruction: %v = zext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun204(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun204 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun205(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun205 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun206(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun206 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun207(<8 x i64> %val1, <8 x i64> %val2) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun207 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun208(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun208 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun209(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun209 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun210(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun210 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun211(<8 x float> %val1, <8 x float> %val2) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun211 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 11 for instruction: %v = zext <8 x i1> %cmp to <8 x i64> +} + +define <8 x i8> @fun212(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i8> + ret <8 x i8> %v + +; CHECK: fun212 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i8> +} + +define <8 x i16> @fun213(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %v + +; CHECK: fun213 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i16> +} + +define <8 x i32> @fun214(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i32> + ret <8 x i32> %v + +; CHECK: fun214 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i32> +} + +define <8 x i64> @fun215(<8 x double> %val1, <8 x double> %val2) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %v = zext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %v + +; CHECK: fun215 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <8 x i1> %cmp to <8 x i64> +} + +define <16 x i8> @fun216(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun216 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = zext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun217(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun217 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = zext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun218(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun218 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 15 for instruction: %v = zext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun219(<16 x i8> %val1, <16 x i8> %val2) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun219 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 39 for instruction: %v = zext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun220(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun220 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun221(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun221 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = zext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun222(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun222 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 11 for instruction: %v = zext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun223(<16 x i16> %val1, <16 x i16> %val2) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun223 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 31 for instruction: %v = zext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun224(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun224 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun225(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun225 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun226(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun226 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun227(<16 x i32> %val1, <16 x i32> %val2) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun227 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 23 for instruction: %v = zext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun228(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun228 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun229(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun229 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun230(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun230 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun231(<16 x i64> %val1, <16 x i64> %val2) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun231 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun232(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun232 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun233(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun233 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun234(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun234 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = zext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun235(<16 x float> %val1, <16 x float> %val2) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun235 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 23 for instruction: %v = zext <16 x i1> %cmp to <16 x i64> +} + +define <16 x i8> @fun236(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %v + +; CHECK: fun236 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i8> +} + +define <16 x i16> @fun237(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i16> + ret <16 x i16> %v + +; CHECK: fun237 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i16> +} + +define <16 x i32> @fun238(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i32> + ret <16 x i32> %v + +; CHECK: fun238 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i32> +} + +define <16 x i64> @fun239(<16 x double> %val1, <16 x double> %val2) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %v = zext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %v + +; CHECK: fun239 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %v = zext <16 x i1> %cmp to <16 x i64> +} + diff --git a/llvm/test/Analysis/CostModel/SystemZ/cmpsel.ll b/llvm/test/Analysis/CostModel/SystemZ/cmpsel.ll new file mode 100644 index 0000000..de72ec3 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/cmpsel.ll @@ -0,0 +1,1987 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Note: Cost estimates of select of a fp-type is somewhat arbitrary, since it +; involves a conditional jump. +; Note: Vector fp32 is not directly supported, and not quite exact in +; estimates (but it is big absolute values). + +define i8 @fun0(i8 %val1, i8 %val2, + i8 %val3, i8 %val4) { + %cmp = icmp eq i8 %val1, %val2 + %sel = select i1 %cmp, i8 %val3, i8 %val4 + ret i8 %sel + +; CHECK: fun0 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i8 %val3, i8 %val4 +} + +define i16 @fun1(i8 %val1, i8 %val2, + i16 %val3, i16 %val4) { + %cmp = icmp eq i8 %val1, %val2 + %sel = select i1 %cmp, i16 %val3, i16 %val4 + ret i16 %sel + +; CHECK: fun1 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i16 %val3, i16 %val4 +} + +define i32 @fun2(i8 %val1, i8 %val2, + i32 %val3, i32 %val4) { + %cmp = icmp eq i8 %val1, %val2 + %sel = select i1 %cmp, i32 %val3, i32 %val4 + ret i32 %sel + +; CHECK: fun2 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i32 %val3, i32 %val4 +} + +define i64 @fun3(i8 %val1, i8 %val2, + i64 %val3, i64 %val4) { + %cmp = icmp eq i8 %val1, %val2 + %sel = select i1 %cmp, i64 %val3, i64 %val4 + ret i64 %sel + +; CHECK: fun3 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %val4 +} + +define float @fun4(i8 %val1, i8 %val2, + float %val3, float %val4) { + %cmp = icmp eq i8 %val1, %val2 + %sel = select i1 %cmp, float %val3, float %val4 + ret float %sel + +; CHECK: fun4 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, float %val3, float %val4 +} + +define double @fun5(i8 %val1, i8 %val2, + double %val3, double %val4) { + %cmp = icmp eq i8 %val1, %val2 + %sel = select i1 %cmp, double %val3, double %val4 + ret double %sel + +; CHECK: fun5 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i8 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, double %val3, double %val4 +} + +define i8 @fun6(i16 %val1, i16 %val2, + i8 %val3, i8 %val4) { + %cmp = icmp eq i16 %val1, %val2 + %sel = select i1 %cmp, i8 %val3, i8 %val4 + ret i8 %sel + +; CHECK: fun6 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i8 %val3, i8 %val4 +} + +define i16 @fun7(i16 %val1, i16 %val2, + i16 %val3, i16 %val4) { + %cmp = icmp eq i16 %val1, %val2 + %sel = select i1 %cmp, i16 %val3, i16 %val4 + ret i16 %sel + +; CHECK: fun7 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i16 %val3, i16 %val4 +} + +define i32 @fun8(i16 %val1, i16 %val2, + i32 %val3, i32 %val4) { + %cmp = icmp eq i16 %val1, %val2 + %sel = select i1 %cmp, i32 %val3, i32 %val4 + ret i32 %sel + +; CHECK: fun8 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i32 %val3, i32 %val4 +} + +define i64 @fun9(i16 %val1, i16 %val2, + i64 %val3, i64 %val4) { + %cmp = icmp eq i16 %val1, %val2 + %sel = select i1 %cmp, i64 %val3, i64 %val4 + ret i64 %sel + +; CHECK: fun9 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %val4 +} + +define float @fun10(i16 %val1, i16 %val2, + float %val3, float %val4) { + %cmp = icmp eq i16 %val1, %val2 + %sel = select i1 %cmp, float %val3, float %val4 + ret float %sel + +; CHECK: fun10 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, float %val3, float %val4 +} + +define double @fun11(i16 %val1, i16 %val2, + double %val3, double %val4) { + %cmp = icmp eq i16 %val1, %val2 + %sel = select i1 %cmp, double %val3, double %val4 + ret double %sel + +; CHECK: fun11 +; CHECK: cost of 3 for instruction: %cmp = icmp eq i16 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, double %val3, double %val4 +} + +define i8 @fun12(i32 %val1, i32 %val2, + i8 %val3, i8 %val4) { + %cmp = icmp eq i32 %val1, %val2 + %sel = select i1 %cmp, i8 %val3, i8 %val4 + ret i8 %sel + +; CHECK: fun12 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i8 %val3, i8 %val4 +} + +define i16 @fun13(i32 %val1, i32 %val2, + i16 %val3, i16 %val4) { + %cmp = icmp eq i32 %val1, %val2 + %sel = select i1 %cmp, i16 %val3, i16 %val4 + ret i16 %sel + +; CHECK: fun13 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i16 %val3, i16 %val4 +} + +define i32 @fun14(i32 %val1, i32 %val2, + i32 %val3, i32 %val4) { + %cmp = icmp eq i32 %val1, %val2 + %sel = select i1 %cmp, i32 %val3, i32 %val4 + ret i32 %sel + +; CHECK: fun14 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i32 %val3, i32 %val4 +} + +define i64 @fun15(i32 %val1, i32 %val2, + i64 %val3, i64 %val4) { + %cmp = icmp eq i32 %val1, %val2 + %sel = select i1 %cmp, i64 %val3, i64 %val4 + ret i64 %sel + +; CHECK: fun15 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %val4 +} + +define float @fun16(i32 %val1, i32 %val2, + float %val3, float %val4) { + %cmp = icmp eq i32 %val1, %val2 + %sel = select i1 %cmp, float %val3, float %val4 + ret float %sel + +; CHECK: fun16 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, float %val3, float %val4 +} + +define double @fun17(i32 %val1, i32 %val2, + double %val3, double %val4) { + %cmp = icmp eq i32 %val1, %val2 + %sel = select i1 %cmp, double %val3, double %val4 + ret double %sel + +; CHECK: fun17 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i32 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, double %val3, double %val4 +} + +define i8 @fun18(i64 %val1, i64 %val2, + i8 %val3, i8 %val4) { + %cmp = icmp eq i64 %val1, %val2 + %sel = select i1 %cmp, i8 %val3, i8 %val4 + ret i8 %sel + +; CHECK: fun18 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i8 %val3, i8 %val4 +} + +define i16 @fun19(i64 %val1, i64 %val2, + i16 %val3, i16 %val4) { + %cmp = icmp eq i64 %val1, %val2 + %sel = select i1 %cmp, i16 %val3, i16 %val4 + ret i16 %sel + +; CHECK: fun19 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i16 %val3, i16 %val4 +} + +define i32 @fun20(i64 %val1, i64 %val2, + i32 %val3, i32 %val4) { + %cmp = icmp eq i64 %val1, %val2 + %sel = select i1 %cmp, i32 %val3, i32 %val4 + ret i32 %sel + +; CHECK: fun20 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i32 %val3, i32 %val4 +} + +define i64 @fun21(i64 %val1, i64 %val2, + i64 %val3, i64 %val4) { + %cmp = icmp eq i64 %val1, %val2 + %sel = select i1 %cmp, i64 %val3, i64 %val4 + ret i64 %sel + +; CHECK: fun21 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %val4 +} + +define float @fun22(i64 %val1, i64 %val2, + float %val3, float %val4) { + %cmp = icmp eq i64 %val1, %val2 + %sel = select i1 %cmp, float %val3, float %val4 + ret float %sel + +; CHECK: fun22 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, float %val3, float %val4 +} + +define double @fun23(i64 %val1, i64 %val2, + double %val3, double %val4) { + %cmp = icmp eq i64 %val1, %val2 + %sel = select i1 %cmp, double %val3, double %val4 + ret double %sel + +; CHECK: fun23 +; CHECK: cost of 1 for instruction: %cmp = icmp eq i64 %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, double %val3, double %val4 +} + +define <2 x i8> @fun24(<2 x i8> %val1, <2 x i8> %val2, + <2 x i8> %val3, <2 x i8> %val4) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 + ret <2 x i8> %sel + +; CHECK: fun24 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 +} + +define <2 x i16> @fun25(<2 x i8> %val1, <2 x i8> %val2, + <2 x i16> %val3, <2 x i16> %val4) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 + ret <2 x i16> %sel + +; CHECK: fun25 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 +} + +define <2 x i32> @fun26(<2 x i8> %val1, <2 x i8> %val2, + <2 x i32> %val3, <2 x i32> %val4) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 + ret <2 x i32> %sel + +; CHECK: fun26 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 +} + +define <2 x i64> @fun27(<2 x i8> %val1, <2 x i8> %val2, + <2 x i64> %val3, <2 x i64> %val4) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 + ret <2 x i64> %sel + +; CHECK: fun27 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 +} + +define <2 x float> @fun28(<2 x i8> %val1, <2 x i8> %val2, + <2 x float> %val3, <2 x float> %val4) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 + ret <2 x float> %sel + +; CHECK: fun28 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 +} + +define <2 x double> @fun29(<2 x i8> %val1, <2 x i8> %val2, + <2 x double> %val3, <2 x double> %val4) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %sel + +; CHECK: fun29 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 +} + +define <2 x i8> @fun30(<2 x i16> %val1, <2 x i16> %val2, + <2 x i8> %val3, <2 x i8> %val4) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 + ret <2 x i8> %sel + +; CHECK: fun30 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 +} + +define <2 x i16> @fun31(<2 x i16> %val1, <2 x i16> %val2, + <2 x i16> %val3, <2 x i16> %val4) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 + ret <2 x i16> %sel + +; CHECK: fun31 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 +} + +define <2 x i32> @fun32(<2 x i16> %val1, <2 x i16> %val2, + <2 x i32> %val3, <2 x i32> %val4) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 + ret <2 x i32> %sel + +; CHECK: fun32 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 +} + +define <2 x i64> @fun33(<2 x i16> %val1, <2 x i16> %val2, + <2 x i64> %val3, <2 x i64> %val4) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 + ret <2 x i64> %sel + +; CHECK: fun33 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 3 for instruction: %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 +} + +define <2 x float> @fun34(<2 x i16> %val1, <2 x i16> %val2, + <2 x float> %val3, <2 x float> %val4) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 + ret <2 x float> %sel + +; CHECK: fun34 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 +} + +define <2 x double> @fun35(<2 x i16> %val1, <2 x i16> %val2, + <2 x double> %val3, <2 x double> %val4) { + %cmp = icmp eq <2 x i16> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %sel + +; CHECK: fun35 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i16> %val1, %val2 +; CHECK: cost of 3 for instruction: %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 +} + +define <2 x i8> @fun36(<2 x i32> %val1, <2 x i32> %val2, + <2 x i8> %val3, <2 x i8> %val4) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 + ret <2 x i8> %sel + +; CHECK: fun36 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 +} + +define <2 x i16> @fun37(<2 x i32> %val1, <2 x i32> %val2, + <2 x i16> %val3, <2 x i16> %val4) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 + ret <2 x i16> %sel + +; CHECK: fun37 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 +} + +define <2 x i32> @fun38(<2 x i32> %val1, <2 x i32> %val2, + <2 x i32> %val3, <2 x i32> %val4) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 + ret <2 x i32> %sel + +; CHECK: fun38 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 +} + +define <2 x i64> @fun39(<2 x i32> %val1, <2 x i32> %val2, + <2 x i64> %val3, <2 x i64> %val4) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 + ret <2 x i64> %sel + +; CHECK: fun39 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 +} + +define <2 x float> @fun40(<2 x i32> %val1, <2 x i32> %val2, + <2 x float> %val3, <2 x float> %val4) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 + ret <2 x float> %sel + +; CHECK: fun40 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 +} + +define <2 x double> @fun41(<2 x i32> %val1, <2 x i32> %val2, + <2 x double> %val3, <2 x double> %val4) { + %cmp = icmp eq <2 x i32> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %sel + +; CHECK: fun41 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 +} + +define <2 x i8> @fun42(<2 x i64> %val1, <2 x i64> %val2, + <2 x i8> %val3, <2 x i8> %val4) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 + ret <2 x i8> %sel + +; CHECK: fun42 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 +} + +define <2 x i16> @fun43(<2 x i64> %val1, <2 x i64> %val2, + <2 x i16> %val3, <2 x i16> %val4) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 + ret <2 x i16> %sel + +; CHECK: fun43 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 +} + +define <2 x i32> @fun44(<2 x i64> %val1, <2 x i64> %val2, + <2 x i32> %val3, <2 x i32> %val4) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 + ret <2 x i32> %sel + +; CHECK: fun44 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 +} + +define <2 x i64> @fun45(<2 x i64> %val1, <2 x i64> %val2, + <2 x i64> %val3, <2 x i64> %val4) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 + ret <2 x i64> %sel + +; CHECK: fun45 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 +} + +define <2 x float> @fun46(<2 x i64> %val1, <2 x i64> %val2, + <2 x float> %val3, <2 x float> %val4) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 + ret <2 x float> %sel + +; CHECK: fun46 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 +} + +define <2 x double> @fun47(<2 x i64> %val1, <2 x i64> %val2, + <2 x double> %val3, <2 x double> %val4) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %sel + +; CHECK: fun47 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 +} + +define <4 x i8> @fun48(<4 x i8> %val1, <4 x i8> %val2, + <4 x i8> %val3, <4 x i8> %val4) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 + ret <4 x i8> %sel + +; CHECK: fun48 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 +} + +define <4 x i16> @fun49(<4 x i8> %val1, <4 x i8> %val2, + <4 x i16> %val3, <4 x i16> %val4) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 + ret <4 x i16> %sel + +; CHECK: fun49 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 +} + +define <4 x i32> @fun50(<4 x i8> %val1, <4 x i8> %val2, + <4 x i32> %val3, <4 x i32> %val4) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 + ret <4 x i32> %sel + +; CHECK: fun50 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 +} + +define <4 x i64> @fun51(<4 x i8> %val1, <4 x i8> %val2, + <4 x i64> %val3, <4 x i64> %val4) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 + ret <4 x i64> %sel + +; CHECK: fun51 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 9 for instruction: %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 +} + +define <4 x float> @fun52(<4 x i8> %val1, <4 x i8> %val2, + <4 x float> %val3, <4 x float> %val4) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %sel + +; CHECK: fun52 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 3 for instruction: %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 +} + +define <4 x double> @fun53(<4 x i8> %val1, <4 x i8> %val2, + <4 x double> %val3, <4 x double> %val4) { + %cmp = icmp eq <4 x i8> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 + ret <4 x double> %sel + +; CHECK: fun53 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i8> %val1, %val2 +; CHECK: cost of 9 for instruction: %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 +} + +define <4 x i8> @fun54(<4 x i16> %val1, <4 x i16> %val2, + <4 x i8> %val3, <4 x i8> %val4) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 + ret <4 x i8> %sel + +; CHECK: fun54 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 +} + +define <4 x i16> @fun55(<4 x i16> %val1, <4 x i16> %val2, + <4 x i16> %val3, <4 x i16> %val4) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 + ret <4 x i16> %sel + +; CHECK: fun55 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 +} + +define <4 x i32> @fun56(<4 x i16> %val1, <4 x i16> %val2, + <4 x i32> %val3, <4 x i32> %val4) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 + ret <4 x i32> %sel + +; CHECK: fun56 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 +} + +define <4 x i64> @fun57(<4 x i16> %val1, <4 x i16> %val2, + <4 x i64> %val3, <4 x i64> %val4) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 + ret <4 x i64> %sel + +; CHECK: fun57 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 7 for instruction: %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 +} + +define <4 x float> @fun58(<4 x i16> %val1, <4 x i16> %val2, + <4 x float> %val3, <4 x float> %val4) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %sel + +; CHECK: fun58 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 +} + +define <4 x double> @fun59(<4 x i16> %val1, <4 x i16> %val2, + <4 x double> %val3, <4 x double> %val4) { + %cmp = icmp eq <4 x i16> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 + ret <4 x double> %sel + +; CHECK: fun59 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i16> %val1, %val2 +; CHECK: cost of 7 for instruction: %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 +} + +define <4 x i8> @fun60(<4 x i32> %val1, <4 x i32> %val2, + <4 x i8> %val3, <4 x i8> %val4) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 + ret <4 x i8> %sel + +; CHECK: fun60 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 +} + +define <4 x i16> @fun61(<4 x i32> %val1, <4 x i32> %val2, + <4 x i16> %val3, <4 x i16> %val4) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 + ret <4 x i16> %sel + +; CHECK: fun61 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 +} + +define <4 x i32> @fun62(<4 x i32> %val1, <4 x i32> %val2, + <4 x i32> %val3, <4 x i32> %val4) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 + ret <4 x i32> %sel + +; CHECK: fun62 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 +} + +define <4 x i64> @fun63(<4 x i32> %val1, <4 x i32> %val2, + <4 x i64> %val3, <4 x i64> %val4) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 + ret <4 x i64> %sel + +; CHECK: fun63 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 +} + +define <4 x float> @fun64(<4 x i32> %val1, <4 x i32> %val2, + <4 x float> %val3, <4 x float> %val4) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %sel + +; CHECK: fun64 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 +} + +define <4 x double> @fun65(<4 x i32> %val1, <4 x i32> %val2, + <4 x double> %val3, <4 x double> %val4) { + %cmp = icmp eq <4 x i32> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 + ret <4 x double> %sel + +; CHECK: fun65 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <4 x i32> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 +} + +define <4 x i8> @fun66(<4 x i64> %val1, <4 x i64> %val2, + <4 x i8> %val3, <4 x i8> %val4) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 + ret <4 x i8> %sel + +; CHECK: fun66 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 +} + +define <4 x i16> @fun67(<4 x i64> %val1, <4 x i64> %val2, + <4 x i16> %val3, <4 x i16> %val4) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 + ret <4 x i16> %sel + +; CHECK: fun67 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 +} + +define <4 x i32> @fun68(<4 x i64> %val1, <4 x i64> %val2, + <4 x i32> %val3, <4 x i32> %val4) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 + ret <4 x i32> %sel + +; CHECK: fun68 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 +} + +define <4 x i64> @fun69(<4 x i64> %val1, <4 x i64> %val2, + <4 x i64> %val3, <4 x i64> %val4) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 + ret <4 x i64> %sel + +; CHECK: fun69 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 +} + +define <4 x float> @fun70(<4 x i64> %val1, <4 x i64> %val2, + <4 x float> %val3, <4 x float> %val4) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %sel + +; CHECK: fun70 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 +} + +define <4 x double> @fun71(<4 x i64> %val1, <4 x i64> %val2, + <4 x double> %val3, <4 x double> %val4) { + %cmp = icmp eq <4 x i64> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 + ret <4 x double> %sel + +; CHECK: fun71 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <4 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 +} + +define <8 x i8> @fun72(<8 x i8> %val1, <8 x i8> %val2, + <8 x i8> %val3, <8 x i8> %val4) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 + ret <8 x i8> %sel + +; CHECK: fun72 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 +} + +define <8 x i16> @fun73(<8 x i8> %val1, <8 x i8> %val2, + <8 x i16> %val3, <8 x i16> %val4) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 + ret <8 x i16> %sel + +; CHECK: fun73 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 +} + +define <8 x i32> @fun74(<8 x i8> %val1, <8 x i8> %val2, + <8 x i32> %val3, <8 x i32> %val4) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 + ret <8 x i32> %sel + +; CHECK: fun74 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 7 for instruction: %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 +} + +define <8 x i64> @fun75(<8 x i8> %val1, <8 x i8> %val2, + <8 x i64> %val3, <8 x i64> %val4) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 + ret <8 x i64> %sel + +; CHECK: fun75 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 19 for instruction: %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 +} + +define <8 x float> @fun76(<8 x i8> %val1, <8 x i8> %val2, + <8 x float> %val3, <8 x float> %val4) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 + ret <8 x float> %sel + +; CHECK: fun76 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 7 for instruction: %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 +} + +define <8 x double> @fun77(<8 x i8> %val1, <8 x i8> %val2, + <8 x double> %val3, <8 x double> %val4) { + %cmp = icmp eq <8 x i8> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 + ret <8 x double> %sel + +; CHECK: fun77 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i8> %val1, %val2 +; CHECK: cost of 19 for instruction: %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 +} + +define <8 x i8> @fun78(<8 x i16> %val1, <8 x i16> %val2, + <8 x i8> %val3, <8 x i8> %val4) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 + ret <8 x i8> %sel + +; CHECK: fun78 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 +} + +define <8 x i16> @fun79(<8 x i16> %val1, <8 x i16> %val2, + <8 x i16> %val3, <8 x i16> %val4) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 + ret <8 x i16> %sel + +; CHECK: fun79 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 +} + +define <8 x i32> @fun80(<8 x i16> %val1, <8 x i16> %val2, + <8 x i32> %val3, <8 x i32> %val4) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 + ret <8 x i32> %sel + +; CHECK: fun80 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 +} + +define <8 x i64> @fun81(<8 x i16> %val1, <8 x i16> %val2, + <8 x i64> %val3, <8 x i64> %val4) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 + ret <8 x i64> %sel + +; CHECK: fun81 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 15 for instruction: %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 +} + +define <8 x float> @fun82(<8 x i16> %val1, <8 x i16> %val2, + <8 x float> %val3, <8 x float> %val4) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 + ret <8 x float> %sel + +; CHECK: fun82 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 +} + +define <8 x double> @fun83(<8 x i16> %val1, <8 x i16> %val2, + <8 x double> %val3, <8 x double> %val4) { + %cmp = icmp eq <8 x i16> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 + ret <8 x double> %sel + +; CHECK: fun83 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <8 x i16> %val1, %val2 +; CHECK: cost of 15 for instruction: %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 +} + +define <8 x i8> @fun84(<8 x i32> %val1, <8 x i32> %val2, + <8 x i8> %val3, <8 x i8> %val4) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 + ret <8 x i8> %sel + +; CHECK: fun84 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 +} + +define <8 x i16> @fun85(<8 x i32> %val1, <8 x i32> %val2, + <8 x i16> %val3, <8 x i16> %val4) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 + ret <8 x i16> %sel + +; CHECK: fun85 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 +} + +define <8 x i32> @fun86(<8 x i32> %val1, <8 x i32> %val2, + <8 x i32> %val3, <8 x i32> %val4) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 + ret <8 x i32> %sel + +; CHECK: fun86 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 +} + +define <8 x i64> @fun87(<8 x i32> %val1, <8 x i32> %val2, + <8 x i64> %val3, <8 x i64> %val4) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 + ret <8 x i64> %sel + +; CHECK: fun87 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 11 for instruction: %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 +} + +define <8 x float> @fun88(<8 x i32> %val1, <8 x i32> %val2, + <8 x float> %val3, <8 x float> %val4) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 + ret <8 x float> %sel + +; CHECK: fun88 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 +} + +define <8 x double> @fun89(<8 x i32> %val1, <8 x i32> %val2, + <8 x double> %val3, <8 x double> %val4) { + %cmp = icmp eq <8 x i32> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 + ret <8 x double> %sel + +; CHECK: fun89 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <8 x i32> %val1, %val2 +; CHECK: cost of 11 for instruction: %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 +} + +define <8 x i8> @fun90(<8 x i64> %val1, <8 x i64> %val2, + <8 x i8> %val3, <8 x i8> %val4) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 + ret <8 x i8> %sel + +; CHECK: fun90 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 +} + +define <8 x i16> @fun91(<8 x i64> %val1, <8 x i64> %val2, + <8 x i16> %val3, <8 x i16> %val4) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 + ret <8 x i16> %sel + +; CHECK: fun91 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 +} + +define <8 x i32> @fun92(<8 x i64> %val1, <8 x i64> %val2, + <8 x i32> %val3, <8 x i32> %val4) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 + ret <8 x i32> %sel + +; CHECK: fun92 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 +} + +define <8 x i64> @fun93(<8 x i64> %val1, <8 x i64> %val2, + <8 x i64> %val3, <8 x i64> %val4) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 + ret <8 x i64> %sel + +; CHECK: fun93 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 +} + +define <8 x float> @fun94(<8 x i64> %val1, <8 x i64> %val2, + <8 x float> %val3, <8 x float> %val4) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 + ret <8 x float> %sel + +; CHECK: fun94 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 +} + +define <8 x double> @fun95(<8 x i64> %val1, <8 x i64> %val2, + <8 x double> %val3, <8 x double> %val4) { + %cmp = icmp eq <8 x i64> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 + ret <8 x double> %sel + +; CHECK: fun95 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <8 x i64> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 +} + +define <16 x i8> @fun96(<16 x i8> %val1, <16 x i8> %val2, + <16 x i8> %val3, <16 x i8> %val4) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 + ret <16 x i8> %sel + +; CHECK: fun96 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 +} + +define <16 x i16> @fun97(<16 x i8> %val1, <16 x i8> %val2, + <16 x i16> %val3, <16 x i16> %val4) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 + ret <16 x i16> %sel + +; CHECK: fun97 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 +} + +define <16 x i32> @fun98(<16 x i8> %val1, <16 x i8> %val2, + <16 x i32> %val3, <16 x i32> %val4) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 + ret <16 x i32> %sel + +; CHECK: fun98 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 15 for instruction: %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 +} + +define <16 x i64> @fun99(<16 x i8> %val1, <16 x i8> %val2, + <16 x i64> %val3, <16 x i64> %val4) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 + ret <16 x i64> %sel + +; CHECK: fun99 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 39 for instruction: %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 +} + +define <16 x float> @fun100(<16 x i8> %val1, <16 x i8> %val2, + <16 x float> %val3, <16 x float> %val4) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 + ret <16 x float> %sel + +; CHECK: fun100 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 15 for instruction: %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 +} + +define <16 x double> @fun101(<16 x i8> %val1, <16 x i8> %val2, + <16 x double> %val3, <16 x double> %val4) { + %cmp = icmp eq <16 x i8> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 + ret <16 x double> %sel + +; CHECK: fun101 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <16 x i8> %val1, %val2 +; CHECK: cost of 39 for instruction: %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 +} + +define <16 x i8> @fun102(<16 x i16> %val1, <16 x i16> %val2, + <16 x i8> %val3, <16 x i8> %val4) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 + ret <16 x i8> %sel + +; CHECK: fun102 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 +} + +define <16 x i16> @fun103(<16 x i16> %val1, <16 x i16> %val2, + <16 x i16> %val3, <16 x i16> %val4) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 + ret <16 x i16> %sel + +; CHECK: fun103 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 +} + +define <16 x i32> @fun104(<16 x i16> %val1, <16 x i16> %val2, + <16 x i32> %val3, <16 x i32> %val4) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 + ret <16 x i32> %sel + +; CHECK: fun104 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 11 for instruction: %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 +} + +define <16 x i64> @fun105(<16 x i16> %val1, <16 x i16> %val2, + <16 x i64> %val3, <16 x i64> %val4) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 + ret <16 x i64> %sel + +; CHECK: fun105 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 31 for instruction: %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 +} + +define <16 x float> @fun106(<16 x i16> %val1, <16 x i16> %val2, + <16 x float> %val3, <16 x float> %val4) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 + ret <16 x float> %sel + +; CHECK: fun106 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 11 for instruction: %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 +} + +define <16 x double> @fun107(<16 x i16> %val1, <16 x i16> %val2, + <16 x double> %val3, <16 x double> %val4) { + %cmp = icmp eq <16 x i16> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 + ret <16 x double> %sel + +; CHECK: fun107 +; CHECK: cost of 2 for instruction: %cmp = icmp eq <16 x i16> %val1, %val2 +; CHECK: cost of 31 for instruction: %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 +} + +define <16 x i8> @fun108(<16 x i32> %val1, <16 x i32> %val2, + <16 x i8> %val3, <16 x i8> %val4) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 + ret <16 x i8> %sel + +; CHECK: fun108 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 +} + +define <16 x i16> @fun109(<16 x i32> %val1, <16 x i32> %val2, + <16 x i16> %val3, <16 x i16> %val4) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 + ret <16 x i16> %sel + +; CHECK: fun109 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 +} + +define <16 x i32> @fun110(<16 x i32> %val1, <16 x i32> %val2, + <16 x i32> %val3, <16 x i32> %val4) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 + ret <16 x i32> %sel + +; CHECK: fun110 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 +} + +define <16 x i64> @fun111(<16 x i32> %val1, <16 x i32> %val2, + <16 x i64> %val3, <16 x i64> %val4) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 + ret <16 x i64> %sel + +; CHECK: fun111 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 23 for instruction: %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 +} + +define <16 x float> @fun112(<16 x i32> %val1, <16 x i32> %val2, + <16 x float> %val3, <16 x float> %val4) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 + ret <16 x float> %sel + +; CHECK: fun112 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 +} + +define <16 x double> @fun113(<16 x i32> %val1, <16 x i32> %val2, + <16 x double> %val3, <16 x double> %val4) { + %cmp = icmp eq <16 x i32> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 + ret <16 x double> %sel + +; CHECK: fun113 +; CHECK: cost of 4 for instruction: %cmp = icmp eq <16 x i32> %val1, %val2 +; CHECK: cost of 23 for instruction: %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 +} + +define <16 x i8> @fun114(<16 x i64> %val1, <16 x i64> %val2, + <16 x i8> %val3, <16 x i8> %val4) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 + ret <16 x i8> %sel + +; CHECK: fun114 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 +} + +define <16 x i16> @fun115(<16 x i64> %val1, <16 x i64> %val2, + <16 x i16> %val3, <16 x i16> %val4) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 + ret <16 x i16> %sel + +; CHECK: fun115 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 +} + +define <16 x i32> @fun116(<16 x i64> %val1, <16 x i64> %val2, + <16 x i32> %val3, <16 x i32> %val4) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 + ret <16 x i32> %sel + +; CHECK: fun116 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 +} + +define <16 x i64> @fun117(<16 x i64> %val1, <16 x i64> %val2, + <16 x i64> %val3, <16 x i64> %val4) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 + ret <16 x i64> %sel + +; CHECK: fun117 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 +} + +define <16 x float> @fun118(<16 x i64> %val1, <16 x i64> %val2, + <16 x float> %val3, <16 x float> %val4) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 + ret <16 x float> %sel + +; CHECK: fun118 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 +} + +define <16 x double> @fun119(<16 x i64> %val1, <16 x i64> %val2, + <16 x double> %val3, <16 x double> %val4) { + %cmp = icmp eq <16 x i64> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 + ret <16 x double> %sel + +; CHECK: fun119 +; CHECK: cost of 8 for instruction: %cmp = icmp eq <16 x i64> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 +} + +define i8 @fun120(float %val1, float %val2, + i8 %val3, i8 %val4) { + %cmp = fcmp ogt float %val1, %val2 + %sel = select i1 %cmp, i8 %val3, i8 %val4 + ret i8 %sel + +; CHECK: fun120 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i8 %val3, i8 %val4 +} + +define i16 @fun121(float %val1, float %val2, + i16 %val3, i16 %val4) { + %cmp = fcmp ogt float %val1, %val2 + %sel = select i1 %cmp, i16 %val3, i16 %val4 + ret i16 %sel + +; CHECK: fun121 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i16 %val3, i16 %val4 +} + +define i32 @fun122(float %val1, float %val2, + i32 %val3, i32 %val4) { + %cmp = fcmp ogt float %val1, %val2 + %sel = select i1 %cmp, i32 %val3, i32 %val4 + ret i32 %sel + +; CHECK: fun122 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i32 %val3, i32 %val4 +} + +define i64 @fun123(float %val1, float %val2, + i64 %val3, i64 %val4) { + %cmp = fcmp ogt float %val1, %val2 + %sel = select i1 %cmp, i64 %val3, i64 %val4 + ret i64 %sel + +; CHECK: fun123 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %val4 +} + +define float @fun124(float %val1, float %val2, + float %val3, float %val4) { + %cmp = fcmp ogt float %val1, %val2 + %sel = select i1 %cmp, float %val3, float %val4 + ret float %sel + +; CHECK: fun124 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, float %val3, float %val4 +} + +define double @fun125(float %val1, float %val2, + double %val3, double %val4) { + %cmp = fcmp ogt float %val1, %val2 + %sel = select i1 %cmp, double %val3, double %val4 + ret double %sel + +; CHECK: fun125 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt float %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, double %val3, double %val4 +} + +define i8 @fun126(double %val1, double %val2, + i8 %val3, i8 %val4) { + %cmp = fcmp ogt double %val1, %val2 + %sel = select i1 %cmp, i8 %val3, i8 %val4 + ret i8 %sel + +; CHECK: fun126 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i8 %val3, i8 %val4 +} + +define i16 @fun127(double %val1, double %val2, + i16 %val3, i16 %val4) { + %cmp = fcmp ogt double %val1, %val2 + %sel = select i1 %cmp, i16 %val3, i16 %val4 + ret i16 %sel + +; CHECK: fun127 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i16 %val3, i16 %val4 +} + +define i32 @fun128(double %val1, double %val2, + i32 %val3, i32 %val4) { + %cmp = fcmp ogt double %val1, %val2 + %sel = select i1 %cmp, i32 %val3, i32 %val4 + ret i32 %sel + +; CHECK: fun128 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i32 %val3, i32 %val4 +} + +define i64 @fun129(double %val1, double %val2, + i64 %val3, i64 %val4) { + %cmp = fcmp ogt double %val1, %val2 + %sel = select i1 %cmp, i64 %val3, i64 %val4 + ret i64 %sel + +; CHECK: fun129 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %val4 +} + +define float @fun130(double %val1, double %val2, + float %val3, float %val4) { + %cmp = fcmp ogt double %val1, %val2 + %sel = select i1 %cmp, float %val3, float %val4 + ret float %sel + +; CHECK: fun130 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, float %val3, float %val4 +} + +define double @fun131(double %val1, double %val2, + double %val3, double %val4) { + %cmp = fcmp ogt double %val1, %val2 + %sel = select i1 %cmp, double %val3, double %val4 + ret double %sel + +; CHECK: fun131 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt double %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select i1 %cmp, double %val3, double %val4 +} + +define <2 x i8> @fun132(<2 x float> %val1, <2 x float> %val2, + <2 x i8> %val3, <2 x i8> %val4) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 + ret <2 x i8> %sel + +; CHECK: fun132 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 +} + +define <2 x i16> @fun133(<2 x float> %val1, <2 x float> %val2, + <2 x i16> %val3, <2 x i16> %val4) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 + ret <2 x i16> %sel + +; CHECK: fun133 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 +} + +define <2 x i32> @fun134(<2 x float> %val1, <2 x float> %val2, + <2 x i32> %val3, <2 x i32> %val4) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 + ret <2 x i32> %sel + +; CHECK: fun134 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 +} + +define <2 x i64> @fun135(<2 x float> %val1, <2 x float> %val2, + <2 x i64> %val3, <2 x i64> %val4) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 + ret <2 x i64> %sel + +; CHECK: fun135 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 +} + +define <2 x float> @fun136(<2 x float> %val1, <2 x float> %val2, + <2 x float> %val3, <2 x float> %val4) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 + ret <2 x float> %sel + +; CHECK: fun136 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 +} + +define <2 x double> @fun137(<2 x float> %val1, <2 x float> %val2, + <2 x double> %val3, <2 x double> %val4) { + %cmp = fcmp ogt <2 x float> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %sel + +; CHECK: fun137 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <2 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 +} + +define <2 x i8> @fun138(<2 x double> %val1, <2 x double> %val2, + <2 x i8> %val3, <2 x i8> %val4) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 + ret <2 x i8> %sel + +; CHECK: fun138 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i8> %val3, <2 x i8> %val4 +} + +define <2 x i16> @fun139(<2 x double> %val1, <2 x double> %val2, + <2 x i16> %val3, <2 x i16> %val4) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 + ret <2 x i16> %sel + +; CHECK: fun139 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i16> %val3, <2 x i16> %val4 +} + +define <2 x i32> @fun140(<2 x double> %val1, <2 x double> %val2, + <2 x i32> %val3, <2 x i32> %val4) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 + ret <2 x i32> %sel + +; CHECK: fun140 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x i32> %val3, <2 x i32> %val4 +} + +define <2 x i64> @fun141(<2 x double> %val1, <2 x double> %val2, + <2 x i64> %val3, <2 x i64> %val4) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 + ret <2 x i64> %sel + +; CHECK: fun141 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 +} + +define <2 x float> @fun142(<2 x double> %val1, <2 x double> %val2, + <2 x float> %val3, <2 x float> %val4) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 + ret <2 x float> %sel + +; CHECK: fun142 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <2 x i1> %cmp, <2 x float> %val3, <2 x float> %val4 +} + +define <2 x double> @fun143(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { + %cmp = fcmp ogt <2 x double> %val1, %val2 + %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %sel + +; CHECK: fun143 +; CHECK: cost of 1 for instruction: %cmp = fcmp ogt <2 x double> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 +} + +define <4 x i8> @fun144(<4 x float> %val1, <4 x float> %val2, + <4 x i8> %val3, <4 x i8> %val4) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 + ret <4 x i8> %sel + +; CHECK: fun144 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 +} + +define <4 x i16> @fun145(<4 x float> %val1, <4 x float> %val2, + <4 x i16> %val3, <4 x i16> %val4) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 + ret <4 x i16> %sel + +; CHECK: fun145 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 +} + +define <4 x i32> @fun146(<4 x float> %val1, <4 x float> %val2, + <4 x i32> %val3, <4 x i32> %val4) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 + ret <4 x i32> %sel + +; CHECK: fun146 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 +} + +define <4 x i64> @fun147(<4 x float> %val1, <4 x float> %val2, + <4 x i64> %val3, <4 x i64> %val4) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 + ret <4 x i64> %sel + +; CHECK: fun147 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 +} + +define <4 x float> @fun148(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %sel + +; CHECK: fun148 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 1 for instruction: %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 +} + +define <4 x double> @fun149(<4 x float> %val1, <4 x float> %val2, + <4 x double> %val3, <4 x double> %val4) { + %cmp = fcmp ogt <4 x float> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 + ret <4 x double> %sel + +; CHECK: fun149 +; CHECK: cost of 10 for instruction: %cmp = fcmp ogt <4 x float> %val1, %val2 +; CHECK: cost of 5 for instruction: %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 +} + +define <4 x i8> @fun150(<4 x double> %val1, <4 x double> %val2, + <4 x i8> %val3, <4 x i8> %val4) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 + ret <4 x i8> %sel + +; CHECK: fun150 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i8> %val3, <4 x i8> %val4 +} + +define <4 x i16> @fun151(<4 x double> %val1, <4 x double> %val2, + <4 x i16> %val3, <4 x i16> %val4) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 + ret <4 x i16> %sel + +; CHECK: fun151 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i16> %val3, <4 x i16> %val4 +} + +define <4 x i32> @fun152(<4 x double> %val1, <4 x double> %val2, + <4 x i32> %val3, <4 x i32> %val4) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 + ret <4 x i32> %sel + +; CHECK: fun152 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 +} + +define <4 x i64> @fun153(<4 x double> %val1, <4 x double> %val2, + <4 x i64> %val3, <4 x i64> %val4) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 + ret <4 x i64> %sel + +; CHECK: fun153 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x i64> %val3, <4 x i64> %val4 +} + +define <4 x float> @fun154(<4 x double> %val1, <4 x double> %val2, + <4 x float> %val3, <4 x float> %val4) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %sel + +; CHECK: fun154 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 +} + +define <4 x double> @fun155(<4 x double> %val1, <4 x double> %val2, + <4 x double> %val3, <4 x double> %val4) { + %cmp = fcmp ogt <4 x double> %val1, %val2 + %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 + ret <4 x double> %sel + +; CHECK: fun155 +; CHECK: cost of 2 for instruction: %cmp = fcmp ogt <4 x double> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <4 x i1> %cmp, <4 x double> %val3, <4 x double> %val4 +} + +define <8 x i8> @fun156(<8 x float> %val1, <8 x float> %val2, + <8 x i8> %val3, <8 x i8> %val4) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 + ret <8 x i8> %sel + +; CHECK: fun156 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 +} + +define <8 x i16> @fun157(<8 x float> %val1, <8 x float> %val2, + <8 x i16> %val3, <8 x i16> %val4) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 + ret <8 x i16> %sel + +; CHECK: fun157 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 +} + +define <8 x i32> @fun158(<8 x float> %val1, <8 x float> %val2, + <8 x i32> %val3, <8 x i32> %val4) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 + ret <8 x i32> %sel + +; CHECK: fun158 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 +} + +define <8 x i64> @fun159(<8 x float> %val1, <8 x float> %val2, + <8 x i64> %val3, <8 x i64> %val4) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 + ret <8 x i64> %sel + +; CHECK: fun159 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 11 for instruction: %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 +} + +define <8 x float> @fun160(<8 x float> %val1, <8 x float> %val2, + <8 x float> %val3, <8 x float> %val4) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 + ret <8 x float> %sel + +; CHECK: fun160 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 2 for instruction: %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 +} + +define <8 x double> @fun161(<8 x float> %val1, <8 x float> %val2, + <8 x double> %val3, <8 x double> %val4) { + %cmp = fcmp ogt <8 x float> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 + ret <8 x double> %sel + +; CHECK: fun161 +; CHECK: cost of 20 for instruction: %cmp = fcmp ogt <8 x float> %val1, %val2 +; CHECK: cost of 11 for instruction: %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 +} + +define <8 x i8> @fun162(<8 x double> %val1, <8 x double> %val2, + <8 x i8> %val3, <8 x i8> %val4) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 + ret <8 x i8> %sel + +; CHECK: fun162 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i8> %val3, <8 x i8> %val4 +} + +define <8 x i16> @fun163(<8 x double> %val1, <8 x double> %val2, + <8 x i16> %val3, <8 x i16> %val4) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 + ret <8 x i16> %sel + +; CHECK: fun163 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 +} + +define <8 x i32> @fun164(<8 x double> %val1, <8 x double> %val2, + <8 x i32> %val3, <8 x i32> %val4) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 + ret <8 x i32> %sel + +; CHECK: fun164 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i32> %val3, <8 x i32> %val4 +} + +define <8 x i64> @fun165(<8 x double> %val1, <8 x double> %val2, + <8 x i64> %val3, <8 x i64> %val4) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 + ret <8 x i64> %sel + +; CHECK: fun165 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x i64> %val3, <8 x i64> %val4 +} + +define <8 x float> @fun166(<8 x double> %val1, <8 x double> %val2, + <8 x float> %val3, <8 x float> %val4) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 + ret <8 x float> %sel + +; CHECK: fun166 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x float> %val3, <8 x float> %val4 +} + +define <8 x double> @fun167(<8 x double> %val1, <8 x double> %val2, + <8 x double> %val3, <8 x double> %val4) { + %cmp = fcmp ogt <8 x double> %val1, %val2 + %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 + ret <8 x double> %sel + +; CHECK: fun167 +; CHECK: cost of 4 for instruction: %cmp = fcmp ogt <8 x double> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <8 x i1> %cmp, <8 x double> %val3, <8 x double> %val4 +} + +define <16 x i8> @fun168(<16 x float> %val1, <16 x float> %val2, + <16 x i8> %val3, <16 x i8> %val4) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 + ret <16 x i8> %sel + +; CHECK: fun168 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 +} + +define <16 x i16> @fun169(<16 x float> %val1, <16 x float> %val2, + <16 x i16> %val3, <16 x i16> %val4) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 + ret <16 x i16> %sel + +; CHECK: fun169 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 +} + +define <16 x i32> @fun170(<16 x float> %val1, <16 x float> %val2, + <16 x i32> %val3, <16 x i32> %val4) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 + ret <16 x i32> %sel + +; CHECK: fun170 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 +} + +define <16 x i64> @fun171(<16 x float> %val1, <16 x float> %val2, + <16 x i64> %val3, <16 x i64> %val4) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 + ret <16 x i64> %sel + +; CHECK: fun171 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 23 for instruction: %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 +} + +define <16 x float> @fun172(<16 x float> %val1, <16 x float> %val2, + <16 x float> %val3, <16 x float> %val4) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 + ret <16 x float> %sel + +; CHECK: fun172 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 4 for instruction: %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 +} + +define <16 x double> @fun173(<16 x float> %val1, <16 x float> %val2, + <16 x double> %val3, <16 x double> %val4) { + %cmp = fcmp ogt <16 x float> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 + ret <16 x double> %sel + +; CHECK: fun173 +; CHECK: cost of 40 for instruction: %cmp = fcmp ogt <16 x float> %val1, %val2 +; CHECK: cost of 23 for instruction: %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 +} + +define <16 x i8> @fun174(<16 x double> %val1, <16 x double> %val2, + <16 x i8> %val3, <16 x i8> %val4) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 + ret <16 x i8> %sel + +; CHECK: fun174 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 +} + +define <16 x i16> @fun175(<16 x double> %val1, <16 x double> %val2, + <16 x i16> %val3, <16 x i16> %val4) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 + ret <16 x i16> %sel + +; CHECK: fun175 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i16> %val3, <16 x i16> %val4 +} + +define <16 x i32> @fun176(<16 x double> %val1, <16 x double> %val2, + <16 x i32> %val3, <16 x i32> %val4) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 + ret <16 x i32> %sel + +; CHECK: fun176 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i32> %val3, <16 x i32> %val4 +} + +define <16 x i64> @fun177(<16 x double> %val1, <16 x double> %val2, + <16 x i64> %val3, <16 x i64> %val4) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 + ret <16 x i64> %sel + +; CHECK: fun177 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x i64> %val3, <16 x i64> %val4 +} + +define <16 x float> @fun178(<16 x double> %val1, <16 x double> %val2, + <16 x float> %val3, <16 x float> %val4) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 + ret <16 x float> %sel + +; CHECK: fun178 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x float> %val3, <16 x float> %val4 +} + +define <16 x double> @fun179(<16 x double> %val1, <16 x double> %val2, + <16 x double> %val3, <16 x double> %val4) { + %cmp = fcmp ogt <16 x double> %val1, %val2 + %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 + ret <16 x double> %sel + +; CHECK: fun179 +; CHECK: cost of 8 for instruction: %cmp = fcmp ogt <16 x double> %val1, %val2 +; CHECK: cost of 8 for instruction: %sel = select <16 x i1> %cmp, <16 x double> %val3, <16 x double> %val4 +} + diff --git a/llvm/test/Analysis/CostModel/SystemZ/ext-load.ll b/llvm/test/Analysis/CostModel/SystemZ/ext-load.ll new file mode 100644 index 0000000..d3d501a --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/ext-load.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Test that an extension of a load does not get an additional cost in cases +; where the load performs the extension. + +define void @sext() { + %li8 = load i8, i8* undef + sext i8 %li8 to i16 + sext i8 %li8 to i32 + sext i8 %li8 to i64 + + %li16 = load i16, i16* undef + sext i16 %li16 to i32 + sext i16 %li16 to i64 + + %li32 = load i32, i32* undef + sext i32 %li32 to i64 + + ret void + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8 = load i8, i8* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %1 = sext i8 %li8 to i16 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %2 = sext i8 %li8 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %3 = sext i8 %li8 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %4 = sext i16 %li16 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %5 = sext i16 %li16 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %6 = sext i32 %li32 to i64 +} + +define void @zext() { + %li8 = load i8, i8* undef + zext i8 %li8 to i16 + zext i8 %li8 to i32 + zext i8 %li8 to i64 + + %li16 = load i16, i16* undef + zext i16 %li16 to i32 + zext i16 %li16 to i64 + + %li32 = load i32, i32* undef + zext i32 %li32 to i64 + + ret void + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8 = load i8, i8* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %1 = zext i8 %li8 to i16 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %2 = zext i8 %li8 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %3 = zext i8 %li8 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %4 = zext i16 %li16 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %5 = zext i16 %li16 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %6 = zext i32 %li32 to i64 +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/fp-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/fp-arith.ll new file mode 100644 index 0000000..08a7c29 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/fp-arith.ll @@ -0,0 +1,119 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Note: The scalarized vector instructions cost is not including any +; extracts, due to the undef operands +; +; Note: FRem is implemented with libcall, so not included here. + +define void @fadd() { + %res0 = fadd float undef, undef + %res1 = fadd double undef, undef + %res2 = fadd fp128 undef, undef + %res3 = fadd <2 x float> undef, undef + %res4 = fadd <2 x double> undef, undef + %res5 = fadd <4 x float> undef, undef + %res6 = fadd <4 x double> undef, undef + %res7 = fadd <8 x float> undef, undef + %res8 = fadd <8 x double> undef, undef + %res9 = fadd <16 x float> undef, undef + %res10 = fadd <16 x double> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fadd float undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fadd double undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fadd fp128 undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fadd <2 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fadd <2 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fadd <4 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fadd <4 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fadd <8 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fadd <8 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fadd <16 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fadd <16 x double> undef, undef + + ret void; +} + +define void @fsub() { + %res0 = fsub float undef, undef + %res1 = fsub double undef, undef + %res2 = fsub fp128 undef, undef + %res3 = fsub <2 x float> undef, undef + %res4 = fsub <2 x double> undef, undef + %res5 = fsub <4 x float> undef, undef + %res6 = fsub <4 x double> undef, undef + %res7 = fsub <8 x float> undef, undef + %res8 = fsub <8 x double> undef, undef + %res9 = fsub <16 x float> undef, undef + %res10 = fsub <16 x double> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fsub float undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fsub double undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fsub fp128 undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fsub <2 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fsub <2 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fsub <4 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fsub <4 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fsub <8 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fsub <8 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fsub <16 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fsub <16 x double> undef, undef + + ret void; +} + +define void @fmul() { + %res0 = fmul float undef, undef + %res1 = fmul double undef, undef + %res2 = fmul fp128 undef, undef + %res3 = fmul <2 x float> undef, undef + %res4 = fmul <2 x double> undef, undef + %res5 = fmul <4 x float> undef, undef + %res6 = fmul <4 x double> undef, undef + %res7 = fmul <8 x float> undef, undef + %res8 = fmul <8 x double> undef, undef + %res9 = fmul <16 x float> undef, undef + %res10 = fmul <16 x double> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fmul float undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fmul double undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fmul fp128 undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fmul <2 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fmul <2 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fmul <4 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fmul <4 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fmul <8 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fmul <8 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fmul <16 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fmul <16 x double> undef, undef + + ret void; +} + +define void @fdiv() { + %res0 = fdiv float undef, undef + %res1 = fdiv double undef, undef + %res2 = fdiv fp128 undef, undef + %res3 = fdiv <2 x float> undef, undef + %res4 = fdiv <2 x double> undef, undef + %res5 = fdiv <4 x float> undef, undef + %res6 = fdiv <4 x double> undef, undef + %res7 = fdiv <8 x float> undef, undef + %res8 = fdiv <8 x double> undef, undef + %res9 = fdiv <16 x float> undef, undef + %res10 = fdiv <16 x double> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fdiv float undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fdiv double undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fdiv fp128 undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fdiv <2 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fdiv <2 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fdiv <4 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fdiv <4 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fdiv <8 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fdiv <8 x double> undef, undef +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fdiv <16 x float> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fdiv <16 x double> undef, undef + + ret void; +} + diff --git a/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll b/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll new file mode 100644 index 0000000..93f27dd --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll @@ -0,0 +1,541 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Note: The scalarized vector instructions costs are not including any +; extracts, due to the undef operands. + +define void @fpext() { + %v0 = fpext double undef to fp128 + %v1 = fpext float undef to fp128 + %v2 = fpext float undef to double + %v3 = fpext <2 x double> undef to <2 x fp128> + %v4 = fpext <2 x float> undef to <2 x fp128> + %v5 = fpext <2 x float> undef to <2 x double> + %v6 = fpext <4 x double> undef to <4 x fp128> + %v7 = fpext <4 x float> undef to <4 x fp128> + %v8 = fpext <4 x float> undef to <4 x double> + %v9 = fpext <8 x double> undef to <8 x fp128> + %v10 = fpext <8 x float> undef to <8 x fp128> + %v11 = fpext <8 x float> undef to <8 x double> + %v12 = fpext <16 x float> undef to <16 x double> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = fpext double undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = fpext float undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext float undef to double +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v3 = fpext <2 x double> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v4 = fpext <2 x float> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v5 = fpext <2 x float> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v6 = fpext <4 x double> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v7 = fpext <4 x float> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v8 = fpext <4 x float> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v9 = fpext <8 x double> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v10 = fpext <8 x float> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v11 = fpext <8 x float> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v12 = fpext <16 x float> undef to <16 x double> + + ret void; +} + +define void @fptosi() { + %v0 = fptosi fp128 undef to i64 + %v1 = fptosi fp128 undef to i32 + %v2 = fptosi fp128 undef to i16 + %v3 = fptosi fp128 undef to i8 + %v4 = fptosi double undef to i64 + %v5 = fptosi double undef to i32 + %v6 = fptosi double undef to i16 + %v7 = fptosi double undef to i8 + %v8 = fptosi float undef to i64 + %v9 = fptosi float undef to i32 + %v10 = fptosi float undef to i16 + %v11 = fptosi float undef to i8 + %v12 = fptosi <2 x fp128> undef to <2 x i64> + %v13 = fptosi <2 x fp128> undef to <2 x i32> + %v14 = fptosi <2 x fp128> undef to <2 x i16> + %v15 = fptosi <2 x fp128> undef to <2 x i8> + %v16 = fptosi <2 x double> undef to <2 x i64> + %v17 = fptosi <2 x double> undef to <2 x i32> + %v18 = fptosi <2 x double> undef to <2 x i16> + %v19 = fptosi <2 x double> undef to <2 x i8> + %v20 = fptosi <2 x float> undef to <2 x i64> + %v21 = fptosi <2 x float> undef to <2 x i32> + %v22 = fptosi <2 x float> undef to <2 x i16> + %v23 = fptosi <2 x float> undef to <2 x i8> + %v24 = fptosi <4 x fp128> undef to <4 x i64> + %v25 = fptosi <4 x fp128> undef to <4 x i32> + %v26 = fptosi <4 x fp128> undef to <4 x i16> + %v27 = fptosi <4 x fp128> undef to <4 x i8> + %v28 = fptosi <4 x double> undef to <4 x i64> + %v29 = fptosi <4 x double> undef to <4 x i32> + %v30 = fptosi <4 x double> undef to <4 x i16> + %v31 = fptosi <4 x double> undef to <4 x i8> + %v32 = fptosi <4 x float> undef to <4 x i64> + %v33 = fptosi <4 x float> undef to <4 x i32> + %v34 = fptosi <4 x float> undef to <4 x i16> + %v35 = fptosi <4 x float> undef to <4 x i8> + %v36 = fptosi <8 x fp128> undef to <8 x i64> + %v37 = fptosi <8 x fp128> undef to <8 x i32> + %v38 = fptosi <8 x fp128> undef to <8 x i16> + %v39 = fptosi <8 x fp128> undef to <8 x i8> + %v40 = fptosi <8 x double> undef to <8 x i64> + %v41 = fptosi <8 x double> undef to <8 x i32> + %v42 = fptosi <8 x double> undef to <8 x i16> + %v43 = fptosi <8 x double> undef to <8 x i8> + %v44 = fptosi <8 x float> undef to <8 x i64> + %v45 = fptosi <8 x float> undef to <8 x i32> + %v46 = fptosi <8 x float> undef to <8 x i16> + %v47 = fptosi <8 x float> undef to <8 x i8> + %v48 = fptosi <16 x double> undef to <16 x i64> + %v49 = fptosi <16 x double> undef to <16 x i32> + %v50 = fptosi <16 x double> undef to <16 x i16> + %v51 = fptosi <16 x double> undef to <16 x i8> + %v52 = fptosi <16 x float> undef to <16 x i64> + %v53 = fptosi <16 x float> undef to <16 x i32> + %v54 = fptosi <16 x float> undef to <16 x i16> + %v55 = fptosi <16 x float> undef to <16 x i8> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = fptosi fp128 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = fptosi fp128 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fptosi fp128 undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v3 = fptosi fp128 undef to i8 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v4 = fptosi double undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v5 = fptosi double undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = fptosi double undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v7 = fptosi double undef to i8 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v8 = fptosi float undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fptosi float undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fptosi float undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fptosi float undef to i8 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v12 = fptosi <2 x fp128> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v13 = fptosi <2 x fp128> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v14 = fptosi <2 x fp128> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v15 = fptosi <2 x fp128> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v16 = fptosi <2 x double> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v17 = fptosi <2 x double> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptosi <2 x double> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptosi <2 x double> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptosi <2 x float> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptosi <2 x float> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptosi <2 x float> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptosi <4 x fp128> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v25 = fptosi <4 x fp128> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v26 = fptosi <4 x fp128> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v27 = fptosi <4 x fp128> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v28 = fptosi <4 x double> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v29 = fptosi <4 x double> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptosi <4 x double> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptosi <4 x double> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptosi <4 x float> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptosi <4 x float> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptosi <4 x float> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptosi <8 x fp128> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v37 = fptosi <8 x fp128> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v38 = fptosi <8 x fp128> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v39 = fptosi <8 x fp128> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v40 = fptosi <8 x double> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v41 = fptosi <8 x double> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptosi <8 x double> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptosi <8 x double> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptosi <8 x float> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptosi <8 x float> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptosi <8 x float> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptosi <16 x double> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v49 = fptosi <16 x double> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptosi <16 x double> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptosi <16 x double> undef to <16 x i8> +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptosi <16 x float> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptosi <16 x float> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptosi <16 x float> undef to <16 x i8> + + ret void; +} + + +define void @fptoui() { + %v0 = fptoui fp128 undef to i64 + %v1 = fptoui fp128 undef to i32 + %v2 = fptoui fp128 undef to i16 + %v3 = fptoui fp128 undef to i8 + %v4 = fptoui double undef to i64 + %v5 = fptoui double undef to i32 + %v6 = fptoui double undef to i16 + %v7 = fptoui double undef to i8 + %v8 = fptoui float undef to i64 + %v9 = fptoui float undef to i32 + %v10 = fptoui float undef to i16 + %v11 = fptoui float undef to i8 + %v12 = fptoui <2 x fp128> undef to <2 x i64> + %v13 = fptoui <2 x fp128> undef to <2 x i32> + %v14 = fptoui <2 x fp128> undef to <2 x i16> + %v15 = fptoui <2 x fp128> undef to <2 x i8> + %v16 = fptoui <2 x double> undef to <2 x i64> + %v17 = fptoui <2 x double> undef to <2 x i32> + %v18 = fptoui <2 x double> undef to <2 x i16> + %v19 = fptoui <2 x double> undef to <2 x i8> + %v20 = fptoui <2 x float> undef to <2 x i64> + %v21 = fptoui <2 x float> undef to <2 x i32> + %v22 = fptoui <2 x float> undef to <2 x i16> + %v23 = fptoui <2 x float> undef to <2 x i8> + %v24 = fptoui <4 x fp128> undef to <4 x i64> + %v25 = fptoui <4 x fp128> undef to <4 x i32> + %v26 = fptoui <4 x fp128> undef to <4 x i16> + %v27 = fptoui <4 x fp128> undef to <4 x i8> + %v28 = fptoui <4 x double> undef to <4 x i64> + %v29 = fptoui <4 x double> undef to <4 x i32> + %v30 = fptoui <4 x double> undef to <4 x i16> + %v31 = fptoui <4 x double> undef to <4 x i8> + %v32 = fptoui <4 x float> undef to <4 x i64> + %v33 = fptoui <4 x float> undef to <4 x i32> + %v34 = fptoui <4 x float> undef to <4 x i16> + %v35 = fptoui <4 x float> undef to <4 x i8> + %v36 = fptoui <8 x fp128> undef to <8 x i64> + %v37 = fptoui <8 x fp128> undef to <8 x i32> + %v38 = fptoui <8 x fp128> undef to <8 x i16> + %v39 = fptoui <8 x fp128> undef to <8 x i8> + %v40 = fptoui <8 x double> undef to <8 x i64> + %v41 = fptoui <8 x double> undef to <8 x i32> + %v42 = fptoui <8 x double> undef to <8 x i16> + %v43 = fptoui <8 x double> undef to <8 x i8> + %v44 = fptoui <8 x float> undef to <8 x i64> + %v45 = fptoui <8 x float> undef to <8 x i32> + %v46 = fptoui <8 x float> undef to <8 x i16> + %v47 = fptoui <8 x float> undef to <8 x i8> + %v48 = fptoui <16 x double> undef to <16 x i64> + %v49 = fptoui <16 x double> undef to <16 x i32> + %v50 = fptoui <16 x double> undef to <16 x i16> + %v51 = fptoui <16 x double> undef to <16 x i8> + %v52 = fptoui <16 x float> undef to <16 x i64> + %v53 = fptoui <16 x float> undef to <16 x i32> + %v54 = fptoui <16 x float> undef to <16 x i16> + %v55 = fptoui <16 x float> undef to <16 x i8> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = fptoui fp128 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = fptoui fp128 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fptoui fp128 undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v3 = fptoui fp128 undef to i8 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v4 = fptoui double undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v5 = fptoui double undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = fptoui double undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v7 = fptoui double undef to i8 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v8 = fptoui float undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fptoui float undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fptoui float undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fptoui float undef to i8 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v12 = fptoui <2 x fp128> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v13 = fptoui <2 x fp128> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v14 = fptoui <2 x fp128> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v15 = fptoui <2 x fp128> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v16 = fptoui <2 x double> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v17 = fptoui <2 x double> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptoui <2 x double> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptoui <2 x double> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptoui <2 x float> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptoui <2 x float> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptoui <2 x float> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptoui <4 x fp128> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v25 = fptoui <4 x fp128> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v26 = fptoui <4 x fp128> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v27 = fptoui <4 x fp128> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v28 = fptoui <4 x double> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v29 = fptoui <4 x double> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptoui <4 x double> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptoui <4 x double> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptoui <4 x float> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptoui <4 x float> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptoui <4 x float> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptoui <8 x fp128> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v37 = fptoui <8 x fp128> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v38 = fptoui <8 x fp128> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v39 = fptoui <8 x fp128> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v40 = fptoui <8 x double> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v41 = fptoui <8 x double> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptoui <8 x double> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptoui <8 x double> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptoui <8 x float> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptoui <8 x float> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptoui <8 x float> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptoui <16 x double> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v49 = fptoui <16 x double> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptoui <16 x double> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptoui <16 x double> undef to <16 x i8> +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptoui <16 x float> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptoui <16 x float> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptoui <16 x float> undef to <16 x i8> + + ret void; +} + +define void @fptrunc() { + %v0 = fptrunc fp128 undef to double + %v1 = fptrunc fp128 undef to float + %v2 = fptrunc double undef to float + %v3 = fptrunc <2 x fp128> undef to <2 x double> + %v4 = fptrunc <2 x fp128> undef to <2 x float> + %v5 = fptrunc <2 x double> undef to <2 x float> + %v6 = fptrunc <4 x fp128> undef to <4 x double> + %v7 = fptrunc <4 x fp128> undef to <4 x float> + %v8 = fptrunc <4 x double> undef to <4 x float> + %v9 = fptrunc <8 x fp128> undef to <8 x double> + %v10 = fptrunc <8 x fp128> undef to <8 x float> + %v11 = fptrunc <8 x double> undef to <8 x float> + %v12 = fptrunc <16 x double> undef to <16 x float> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = fptrunc fp128 undef to double +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = fptrunc fp128 undef to float +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fptrunc double undef to float +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v3 = fptrunc <2 x fp128> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v4 = fptrunc <2 x fp128> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v5 = fptrunc <2 x double> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v6 = fptrunc <4 x fp128> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v7 = fptrunc <4 x fp128> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v8 = fptrunc <4 x double> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v9 = fptrunc <8 x fp128> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v10 = fptrunc <8 x fp128> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v11 = fptrunc <8 x double> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v12 = fptrunc <16 x double> undef to <16 x float> + + ret void; +} + +define void @sitofp() { + %v0 = sitofp i64 undef to fp128 + %v1 = sitofp i64 undef to double + %v2 = sitofp i64 undef to float + %v3 = sitofp i32 undef to fp128 + %v4 = sitofp i32 undef to double + %v5 = sitofp i32 undef to float + %v6 = sitofp i16 undef to fp128 + %v7 = sitofp i16 undef to double + %v8 = sitofp i16 undef to float + %v9 = sitofp i8 undef to fp128 + %v10 = sitofp i8 undef to double + %v11 = sitofp i8 undef to float + %v12 = sitofp <2 x i64> undef to <2 x fp128> + %v13 = sitofp <2 x i64> undef to <2 x double> + %v14 = sitofp <2 x i64> undef to <2 x float> + %v15 = sitofp <2 x i32> undef to <2 x fp128> + %v16 = sitofp <2 x i32> undef to <2 x double> + %v17 = sitofp <2 x i32> undef to <2 x float> + %v18 = sitofp <2 x i16> undef to <2 x fp128> + %v19 = sitofp <2 x i16> undef to <2 x double> + %v20 = sitofp <2 x i16> undef to <2 x float> + %v21 = sitofp <2 x i8> undef to <2 x fp128> + %v22 = sitofp <2 x i8> undef to <2 x double> + %v23 = sitofp <2 x i8> undef to <2 x float> + %v24 = sitofp <4 x i64> undef to <4 x fp128> + %v25 = sitofp <4 x i64> undef to <4 x double> + %v26 = sitofp <4 x i64> undef to <4 x float> + %v27 = sitofp <4 x i32> undef to <4 x fp128> + %v28 = sitofp <4 x i32> undef to <4 x double> + %v29 = sitofp <4 x i32> undef to <4 x float> + %v30 = sitofp <4 x i16> undef to <4 x fp128> + %v31 = sitofp <4 x i16> undef to <4 x double> + %v32 = sitofp <4 x i16> undef to <4 x float> + %v33 = sitofp <4 x i8> undef to <4 x fp128> + %v34 = sitofp <4 x i8> undef to <4 x double> + %v35 = sitofp <4 x i8> undef to <4 x float> + %v36 = sitofp <8 x i64> undef to <8 x fp128> + %v37 = sitofp <8 x i64> undef to <8 x double> + %v38 = sitofp <8 x i64> undef to <8 x float> + %v39 = sitofp <8 x i32> undef to <8 x fp128> + %v40 = sitofp <8 x i32> undef to <8 x double> + %v41 = sitofp <8 x i32> undef to <8 x float> + %v42 = sitofp <8 x i16> undef to <8 x fp128> + %v43 = sitofp <8 x i16> undef to <8 x double> + %v44 = sitofp <8 x i16> undef to <8 x float> + %v45 = sitofp <8 x i8> undef to <8 x fp128> + %v46 = sitofp <8 x i8> undef to <8 x double> + %v47 = sitofp <8 x i8> undef to <8 x float> + %v48 = sitofp <16 x i64> undef to <16 x double> + %v49 = sitofp <16 x i64> undef to <16 x float> + %v50 = sitofp <16 x i32> undef to <16 x double> + %v51 = sitofp <16 x i32> undef to <16 x float> + %v52 = sitofp <16 x i16> undef to <16 x double> + %v53 = sitofp <16 x i16> undef to <16 x float> + %v54 = sitofp <16 x i8> undef to <16 x double> + %v55 = sitofp <16 x i8> undef to <16 x float> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sitofp i64 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = sitofp i64 undef to double +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sitofp i64 undef to float +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v3 = sitofp i32 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sitofp i32 undef to double +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v5 = sitofp i32 undef to float +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v6 = sitofp i16 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v7 = sitofp i16 undef to double +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v8 = sitofp i16 undef to float +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v9 = sitofp i8 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v10 = sitofp i8 undef to double +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v11 = sitofp i8 undef to float +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v12 = sitofp <2 x i64> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v13 = sitofp <2 x i64> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v14 = sitofp <2 x i64> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v15 = sitofp <2 x i32> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v16 = sitofp <2 x i32> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = sitofp <2 x i16> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v19 = sitofp <2 x i16> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v20 = sitofp <2 x i16> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v21 = sitofp <2 x i8> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v22 = sitofp <2 x i8> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v23 = sitofp <2 x i8> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v24 = sitofp <4 x i64> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v25 = sitofp <4 x i64> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v26 = sitofp <4 x i64> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v27 = sitofp <4 x i32> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v28 = sitofp <4 x i32> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = sitofp <4 x i16> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v31 = sitofp <4 x i16> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v32 = sitofp <4 x i16> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = sitofp <4 x i8> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v34 = sitofp <4 x i8> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v35 = sitofp <4 x i8> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v36 = sitofp <8 x i64> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v37 = sitofp <8 x i64> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v38 = sitofp <8 x i64> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v39 = sitofp <8 x i32> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v40 = sitofp <8 x i32> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = sitofp <8 x i16> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v43 = sitofp <8 x i16> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v44 = sitofp <8 x i16> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = sitofp <8 x i8> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v46 = sitofp <8 x i8> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v47 = sitofp <8 x i8> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = sitofp <16 x i64> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v49 = sitofp <16 x i64> undef to <16 x float> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = sitofp <16 x i32> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v52 = sitofp <16 x i16> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v53 = sitofp <16 x i16> undef to <16 x float> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v54 = sitofp <16 x i8> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v55 = sitofp <16 x i8> undef to <16 x float> + + ret void; +} + +define void @uitofp() { + %v0 = uitofp i64 undef to fp128 + %v1 = uitofp i64 undef to double + %v2 = uitofp i64 undef to float + %v3 = uitofp i32 undef to fp128 + %v4 = uitofp i32 undef to double + %v5 = uitofp i32 undef to float + %v6 = uitofp i16 undef to fp128 + %v7 = uitofp i16 undef to double + %v8 = uitofp i16 undef to float + %v9 = uitofp i8 undef to fp128 + %v10 = uitofp i8 undef to double + %v11 = uitofp i8 undef to float + %v12 = uitofp <2 x i64> undef to <2 x fp128> + %v13 = uitofp <2 x i64> undef to <2 x double> + %v14 = uitofp <2 x i64> undef to <2 x float> + %v15 = uitofp <2 x i32> undef to <2 x fp128> + %v16 = uitofp <2 x i32> undef to <2 x double> + %v17 = uitofp <2 x i32> undef to <2 x float> + %v18 = uitofp <2 x i16> undef to <2 x fp128> + %v19 = uitofp <2 x i16> undef to <2 x double> + %v20 = uitofp <2 x i16> undef to <2 x float> + %v21 = uitofp <2 x i8> undef to <2 x fp128> + %v22 = uitofp <2 x i8> undef to <2 x double> + %v23 = uitofp <2 x i8> undef to <2 x float> + %v24 = uitofp <4 x i64> undef to <4 x fp128> + %v25 = uitofp <4 x i64> undef to <4 x double> + %v26 = uitofp <4 x i64> undef to <4 x float> + %v27 = uitofp <4 x i32> undef to <4 x fp128> + %v28 = uitofp <4 x i32> undef to <4 x double> + %v29 = uitofp <4 x i32> undef to <4 x float> + %v30 = uitofp <4 x i16> undef to <4 x fp128> + %v31 = uitofp <4 x i16> undef to <4 x double> + %v32 = uitofp <4 x i16> undef to <4 x float> + %v33 = uitofp <4 x i8> undef to <4 x fp128> + %v34 = uitofp <4 x i8> undef to <4 x double> + %v35 = uitofp <4 x i8> undef to <4 x float> + %v36 = uitofp <8 x i64> undef to <8 x fp128> + %v37 = uitofp <8 x i64> undef to <8 x double> + %v38 = uitofp <8 x i64> undef to <8 x float> + %v39 = uitofp <8 x i32> undef to <8 x fp128> + %v40 = uitofp <8 x i32> undef to <8 x double> + %v41 = uitofp <8 x i32> undef to <8 x float> + %v42 = uitofp <8 x i16> undef to <8 x fp128> + %v43 = uitofp <8 x i16> undef to <8 x double> + %v44 = uitofp <8 x i16> undef to <8 x float> + %v45 = uitofp <8 x i8> undef to <8 x fp128> + %v46 = uitofp <8 x i8> undef to <8 x double> + %v47 = uitofp <8 x i8> undef to <8 x float> + %v48 = uitofp <16 x i64> undef to <16 x double> + %v49 = uitofp <16 x i64> undef to <16 x float> + %v50 = uitofp <16 x i32> undef to <16 x double> + %v51 = uitofp <16 x i32> undef to <16 x float> + %v52 = uitofp <16 x i16> undef to <16 x double> + %v53 = uitofp <16 x i16> undef to <16 x float> + %v54 = uitofp <16 x i8> undef to <16 x double> + %v55 = uitofp <16 x i8> undef to <16 x float> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = uitofp i64 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = uitofp i64 undef to double +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = uitofp i64 undef to float +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v3 = uitofp i32 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v4 = uitofp i32 undef to double +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v5 = uitofp i32 undef to float +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v6 = uitofp i16 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v7 = uitofp i16 undef to double +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v8 = uitofp i16 undef to float +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v9 = uitofp i8 undef to fp128 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v10 = uitofp i8 undef to double +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v11 = uitofp i8 undef to float +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v12 = uitofp <2 x i64> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v13 = uitofp <2 x i64> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v14 = uitofp <2 x i64> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v15 = uitofp <2 x i32> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v16 = uitofp <2 x i32> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = uitofp <2 x i16> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v19 = uitofp <2 x i16> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v20 = uitofp <2 x i16> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v21 = uitofp <2 x i8> undef to <2 x fp128> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v22 = uitofp <2 x i8> undef to <2 x double> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v23 = uitofp <2 x i8> undef to <2 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v24 = uitofp <4 x i64> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v25 = uitofp <4 x i64> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v26 = uitofp <4 x i64> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v27 = uitofp <4 x i32> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v28 = uitofp <4 x i32> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = uitofp <4 x i16> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v31 = uitofp <4 x i16> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v32 = uitofp <4 x i16> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = uitofp <4 x i8> undef to <4 x fp128> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v34 = uitofp <4 x i8> undef to <4 x double> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v35 = uitofp <4 x i8> undef to <4 x float> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v36 = uitofp <8 x i64> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v37 = uitofp <8 x i64> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v38 = uitofp <8 x i64> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %v39 = uitofp <8 x i32> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v40 = uitofp <8 x i32> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = uitofp <8 x i16> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v43 = uitofp <8 x i16> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v44 = uitofp <8 x i16> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = uitofp <8 x i8> undef to <8 x fp128> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v46 = uitofp <8 x i8> undef to <8 x double> +; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %v47 = uitofp <8 x i8> undef to <8 x float> +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = uitofp <16 x i64> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v49 = uitofp <16 x i64> undef to <16 x float> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = uitofp <16 x i32> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v52 = uitofp <16 x i16> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v53 = uitofp <16 x i16> undef to <16 x float> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v54 = uitofp <16 x i8> undef to <16 x double> +; CHECK: Cost Model: Found an estimated cost of 64 for instruction: %v55 = uitofp <16 x i8> undef to <16 x float> + + ret void; +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll new file mode 100644 index 0000000..518c9b0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll @@ -0,0 +1,326 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Note: The scalarized vector instructions costs are not including any +; extracts, due to the undef operands. + +define void @add() { + %res0 = add i8 undef, undef + %res1 = add i16 undef, undef + %res2 = add i32 undef, undef + %res3 = add i64 undef, undef + %res4 = add <2 x i8> undef, undef + %res5 = add <2 x i16> undef, undef + %res6 = add <2 x i32> undef, undef + %res7 = add <2 x i64> undef, undef + %res8 = add <4 x i8> undef, undef + %res9 = add <4 x i16> undef, undef + %res10 = add <4 x i32> undef, undef + %res11 = add <4 x i64> undef, undef + %res12 = add <8 x i8> undef, undef + %res13 = add <8 x i16> undef, undef + %res14 = add <8 x i32> undef, undef + %res15 = add <8 x i64> undef, undef + %res16 = add <16 x i8> undef, undef + %res17 = add <16 x i16> undef, undef + %res18 = add <16 x i32> undef, undef + %res19 = add <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = add i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = add i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = add i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = add i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = add <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = add <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = add <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = add <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = add <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = add <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = add <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = add <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = add <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = add <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = add <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = add <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = add <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = add <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = add <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = add <16 x i64> undef, undef + + ret void; +} + +define void @sub() { + %res0 = sub i8 undef, undef + %res1 = sub i16 undef, undef + %res2 = sub i32 undef, undef + %res3 = sub i64 undef, undef + %res4 = sub <2 x i8> undef, undef + %res5 = sub <2 x i16> undef, undef + %res6 = sub <2 x i32> undef, undef + %res7 = sub <2 x i64> undef, undef + %res8 = sub <4 x i8> undef, undef + %res9 = sub <4 x i16> undef, undef + %res10 = sub <4 x i32> undef, undef + %res11 = sub <4 x i64> undef, undef + %res12 = sub <8 x i8> undef, undef + %res13 = sub <8 x i16> undef, undef + %res14 = sub <8 x i32> undef, undef + %res15 = sub <8 x i64> undef, undef + %res16 = sub <16 x i8> undef, undef + %res17 = sub <16 x i16> undef, undef + %res18 = sub <16 x i32> undef, undef + %res19 = sub <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = sub i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = sub i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = sub i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = sub i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = sub <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = sub <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = sub <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = sub <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = sub <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = sub <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = sub <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = sub <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = sub <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = sub <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = sub <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = sub <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = sub <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = sub <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = sub <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = sub <16 x i64> undef, undef + + ret void; +} + +define void @mul() { + %res0 = mul i8 undef, undef + %res1 = mul i16 undef, undef + %res2 = mul i32 undef, undef + %res3 = mul i64 undef, undef + %res4 = mul <2 x i8> undef, undef + %res5 = mul <2 x i16> undef, undef + %res6 = mul <2 x i32> undef, undef + %res7 = mul <2 x i64> undef, undef + %res8 = mul <4 x i8> undef, undef + %res9 = mul <4 x i16> undef, undef + %res10 = mul <4 x i32> undef, undef + %res11 = mul <4 x i64> undef, undef + %res12 = mul <8 x i8> undef, undef + %res13 = mul <8 x i16> undef, undef + %res14 = mul <8 x i32> undef, undef + %res15 = mul <8 x i64> undef, undef + %res16 = mul <16 x i8> undef, undef + %res17 = mul <16 x i16> undef, undef + %res18 = mul <16 x i32> undef, undef + %res19 = mul <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = mul i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = mul i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = mul i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = mul <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = mul <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %res7 = mul <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = mul <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = mul <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res11 = mul <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = mul <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = mul <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = mul <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res15 = mul <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = mul <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = mul <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res19 = mul <16 x i64> undef, undef + + ret void; +} + +define void @sdiv() { + %res0 = sdiv i8 undef, undef + %res1 = sdiv i16 undef, undef + %res2 = sdiv i32 undef, undef + %res3 = sdiv i64 undef, undef + %res4 = sdiv <2 x i8> undef, undef + %res5 = sdiv <2 x i16> undef, undef + %res6 = sdiv <2 x i32> undef, undef + %res7 = sdiv <2 x i64> undef, undef + %res8 = sdiv <4 x i8> undef, undef + %res9 = sdiv <4 x i16> undef, undef + %res10 = sdiv <4 x i32> undef, undef + %res11 = sdiv <4 x i64> undef, undef + %res12 = sdiv <8 x i8> undef, undef + %res13 = sdiv <8 x i16> undef, undef + %res14 = sdiv <8 x i32> undef, undef + %res15 = sdiv <8 x i64> undef, undef + %res16 = sdiv <16 x i8> undef, undef + %res17 = sdiv <16 x i16> undef, undef + %res18 = sdiv <16 x i32> undef, undef + %res19 = sdiv <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res0 = sdiv i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res1 = sdiv i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res2 = sdiv i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = sdiv i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res4 = sdiv <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res5 = sdiv <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res6 = sdiv <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %res7 = sdiv <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res8 = sdiv <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res9 = sdiv <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res10 = sdiv <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res11 = sdiv <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res12 = sdiv <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res13 = sdiv <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res14 = sdiv <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res15 = sdiv <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res16 = sdiv <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res17 = sdiv <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %res18 = sdiv <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res19 = sdiv <16 x i64> undef, undef + + ret void; +} + +define void @srem() { + %res0 = srem i8 undef, undef + %res1 = srem i16 undef, undef + %res2 = srem i32 undef, undef + %res3 = srem i64 undef, undef + %res4 = srem <2 x i8> undef, undef + %res5 = srem <2 x i16> undef, undef + %res6 = srem <2 x i32> undef, undef + %res7 = srem <2 x i64> undef, undef + %res8 = srem <4 x i8> undef, undef + %res9 = srem <4 x i16> undef, undef + %res10 = srem <4 x i32> undef, undef + %res11 = srem <4 x i64> undef, undef + %res12 = srem <8 x i8> undef, undef + %res13 = srem <8 x i16> undef, undef + %res14 = srem <8 x i32> undef, undef + %res15 = srem <8 x i64> undef, undef + %res16 = srem <16 x i8> undef, undef + %res17 = srem <16 x i16> undef, undef + %res18 = srem <16 x i32> undef, undef + %res19 = srem <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res0 = srem i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res1 = srem i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res2 = srem i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = srem i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res4 = srem <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res5 = srem <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res6 = srem <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %res7 = srem <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res8 = srem <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res9 = srem <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res10 = srem <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res11 = srem <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res12 = srem <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res13 = srem <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res14 = srem <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res15 = srem <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res16 = srem <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res17 = srem <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %res18 = srem <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res19 = srem <16 x i64> undef, undef + + ret void; +} + +define void @udiv() { + %res0 = udiv i8 undef, undef + %res1 = udiv i16 undef, undef + %res2 = udiv i32 undef, undef + %res3 = udiv i64 undef, undef + %res4 = udiv <2 x i8> undef, undef + %res5 = udiv <2 x i16> undef, undef + %res6 = udiv <2 x i32> undef, undef + %res7 = udiv <2 x i64> undef, undef + %res8 = udiv <4 x i8> undef, undef + %res9 = udiv <4 x i16> undef, undef + %res10 = udiv <4 x i32> undef, undef + %res11 = udiv <4 x i64> undef, undef + %res12 = udiv <8 x i8> undef, undef + %res13 = udiv <8 x i16> undef, undef + %res14 = udiv <8 x i32> undef, undef + %res15 = udiv <8 x i64> undef, undef + %res16 = udiv <16 x i8> undef, undef + %res17 = udiv <16 x i16> undef, undef + %res18 = udiv <16 x i32> undef, undef + %res19 = udiv <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res0 = udiv i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res1 = udiv i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res2 = udiv i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res3 = udiv i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res4 = udiv <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res5 = udiv <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res6 = udiv <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %res7 = udiv <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res8 = udiv <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res9 = udiv <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res10 = udiv <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res11 = udiv <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res12 = udiv <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res13 = udiv <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res14 = udiv <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res15 = udiv <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res16 = udiv <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res17 = udiv <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %res18 = udiv <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res19 = udiv <16 x i64> undef, undef + + ret void; +} + +define void @urem() { + %res0 = urem i8 undef, undef + %res1 = urem i16 undef, undef + %res2 = urem i32 undef, undef + %res3 = urem i64 undef, undef + %res4 = urem <2 x i8> undef, undef + %res5 = urem <2 x i16> undef, undef + %res6 = urem <2 x i32> undef, undef + %res7 = urem <2 x i64> undef, undef + %res8 = urem <4 x i8> undef, undef + %res9 = urem <4 x i16> undef, undef + %res10 = urem <4 x i32> undef, undef + %res11 = urem <4 x i64> undef, undef + %res12 = urem <8 x i8> undef, undef + %res13 = urem <8 x i16> undef, undef + %res14 = urem <8 x i32> undef, undef + %res15 = urem <8 x i64> undef, undef + %res16 = urem <16 x i8> undef, undef + %res17 = urem <16 x i16> undef, undef + %res18 = urem <16 x i32> undef, undef + %res19 = urem <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res0 = urem i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res1 = urem i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res2 = urem i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res3 = urem i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res4 = urem <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res5 = urem <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res6 = urem <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %res7 = urem <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res8 = urem <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res9 = urem <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res10 = urem <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %res11 = urem <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res12 = urem <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res13 = urem <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res14 = urem <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %res15 = urem <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res16 = urem <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 80 for instruction: %res17 = urem <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %res18 = urem <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %res19 = urem <16 x i64> undef, undef + + ret void; +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/int-cast.ll b/llvm/test/Analysis/CostModel/SystemZ/int-cast.ll new file mode 100644 index 0000000..7764c6f --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/int-cast.ll @@ -0,0 +1,199 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +define void @sext() { + %v0 = sext i8 undef to i16 + %v1 = sext i8 undef to i32 + %v2 = sext i8 undef to i64 + %v3 = sext i16 undef to i32 + %v4 = sext i16 undef to i64 + %v5 = sext i32 undef to i64 + %v6 = sext <2 x i8> undef to <2 x i16> + %v7 = sext <2 x i8> undef to <2 x i32> + %v8 = sext <2 x i8> undef to <2 x i64> + %v9 = sext <2 x i16> undef to <2 x i32> + %v10 = sext <2 x i16> undef to <2 x i64> + %v11 = sext <2 x i32> undef to <2 x i64> + %v12 = sext <4 x i8> undef to <4 x i16> + %v13 = sext <4 x i8> undef to <4 x i32> + %v14 = sext <4 x i8> undef to <4 x i64> + %v15 = sext <4 x i16> undef to <4 x i32> + %v16 = sext <4 x i16> undef to <4 x i64> + %v17 = sext <4 x i32> undef to <4 x i64> + %v18 = sext <8 x i8> undef to <8 x i16> + %v19 = sext <8 x i8> undef to <8 x i32> + %v20 = sext <8 x i8> undef to <8 x i64> + %v21 = sext <8 x i16> undef to <8 x i32> + %v22 = sext <8 x i16> undef to <8 x i64> + %v23 = sext <8 x i32> undef to <8 x i64> + %v24 = sext <16 x i8> undef to <16 x i16> + %v25 = sext <16 x i8> undef to <16 x i32> + %v26 = sext <16 x i8> undef to <16 x i64> + %v27 = sext <16 x i16> undef to <16 x i32> + %v28 = sext <16 x i16> undef to <16 x i64> + %v29 = sext <16 x i32> undef to <16 x i64> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = sext i8 undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = sext i8 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = sext i8 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v3 = sext i16 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v4 = sext i16 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v5 = sext i32 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sext <2 x i8> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v7 = sext <2 x i8> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v8 = sext <2 x i8> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = sext <2 x i16> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v10 = sext <2 x i16> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = sext <2 x i32> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v12 = sext <4 x i8> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v13 = sext <4 x i8> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = sext <4 x i8> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v15 = sext <4 x i16> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v16 = sext <4 x i16> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v17 = sext <4 x i32> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v18 = sext <8 x i8> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v19 = sext <8 x i8> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %v20 = sext <8 x i8> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v21 = sext <8 x i16> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 11 for instruction: %v22 = sext <8 x i16> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = sext <8 x i32> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v24 = sext <16 x i8> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 11 for instruction: %v25 = sext <16 x i8> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 31 for instruction: %v26 = sext <16 x i8> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v27 = sext <16 x i16> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 22 for instruction: %v28 = sext <16 x i16> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v29 = sext <16 x i32> undef to <16 x i64> + + ret void +} + +define void @zext() { + %v0 = zext i8 undef to i16 + %v1 = zext i8 undef to i32 + %v2 = zext i8 undef to i64 + %v3 = zext i16 undef to i32 + %v4 = zext i16 undef to i64 + %v5 = zext i32 undef to i64 + %v6 = zext <2 x i8> undef to <2 x i16> + %v7 = zext <2 x i8> undef to <2 x i32> + %v8 = zext <2 x i8> undef to <2 x i64> + %v9 = zext <2 x i16> undef to <2 x i32> + %v10 = zext <2 x i16> undef to <2 x i64> + %v11 = zext <2 x i32> undef to <2 x i64> + %v12 = zext <4 x i8> undef to <4 x i16> + %v13 = zext <4 x i8> undef to <4 x i32> + %v14 = zext <4 x i8> undef to <4 x i64> + %v15 = zext <4 x i16> undef to <4 x i32> + %v16 = zext <4 x i16> undef to <4 x i64> + %v17 = zext <4 x i32> undef to <4 x i64> + %v18 = zext <8 x i8> undef to <8 x i16> + %v19 = zext <8 x i8> undef to <8 x i32> + %v20 = zext <8 x i8> undef to <8 x i64> + %v21 = zext <8 x i16> undef to <8 x i32> + %v22 = zext <8 x i16> undef to <8 x i64> + %v23 = zext <8 x i32> undef to <8 x i64> + %v24 = zext <16 x i8> undef to <16 x i16> + %v25 = zext <16 x i8> undef to <16 x i32> + %v26 = zext <16 x i8> undef to <16 x i64> + %v27 = zext <16 x i16> undef to <16 x i32> + %v28 = zext <16 x i16> undef to <16 x i64> + %v29 = zext <16 x i32> undef to <16 x i64> + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v0 = zext i8 undef to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v1 = zext i8 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v2 = zext i8 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v3 = zext i16 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v4 = zext i16 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v5 = zext i32 undef to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = zext <2 x i8> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v7 = zext <2 x i8> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v8 = zext <2 x i8> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v10 = zext <2 x i16> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> undef to <2 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v12 = zext <4 x i8> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v13 = zext <4 x i8> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = zext <4 x i8> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v15 = zext <4 x i16> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v16 = zext <4 x i16> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v17 = zext <4 x i32> undef to <4 x i64> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v18 = zext <8 x i8> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v19 = zext <8 x i8> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %v20 = zext <8 x i8> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v21 = zext <8 x i16> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 11 for instruction: %v22 = zext <8 x i16> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = zext <8 x i32> undef to <8 x i64> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v24 = zext <16 x i8> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 11 for instruction: %v25 = zext <16 x i8> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 31 for instruction: %v26 = zext <16 x i8> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v27 = zext <16 x i16> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 22 for instruction: %v28 = zext <16 x i16> undef to <16 x i64> +; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v29 = zext <16 x i32> undef to <16 x i64> + + ret void +} + +define void @trunc() { + %v0 = trunc i16 undef to i8 + %v1 = trunc i32 undef to i16 + %v2 = trunc i32 undef to i8 + %v3 = trunc i64 undef to i32 + %v4 = trunc i64 undef to i16 + %v5 = trunc i64 undef to i8 + %v6 = trunc <2 x i16> undef to <2 x i8> + %v7 = trunc <2 x i32> undef to <2 x i16> + %v8 = trunc <2 x i32> undef to <2 x i8> + %v9 = trunc <2 x i64> undef to <2 x i32> + %v10 = trunc <2 x i64> undef to <2 x i16> + %v11 = trunc <2 x i64> undef to <2 x i8> + %v12 = trunc <4 x i16> undef to <4 x i8> + %v13 = trunc <4 x i32> undef to <4 x i16> + %v14 = trunc <4 x i32> undef to <4 x i8> + %v15 = trunc <4 x i64> undef to <4 x i32> + %v16 = trunc <4 x i64> undef to <4 x i16> + %v17 = trunc <4 x i64> undef to <4 x i8> + %v18 = trunc <8 x i16> undef to <8 x i8> + %v19 = trunc <8 x i32> undef to <8 x i16> + %v20 = trunc <8 x i32> undef to <8 x i8> + %v21 = trunc <8 x i64> undef to <8 x i32> + %v22 = trunc <8 x i64> undef to <8 x i16> + %v23 = trunc <8 x i64> undef to <8 x i8> + %v24 = trunc <16 x i16> undef to <16 x i8> + %v25 = trunc <16 x i32> undef to <16 x i16> + %v26 = trunc <16 x i32> undef to <16 x i8> + %v27 = trunc <16 x i64> undef to <16 x i32> + %v28 = trunc <16 x i64> undef to <16 x i16> + %v29 = trunc <16 x i64> undef to <16 x i8> + +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v0 = trunc i16 undef to i8 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v1 = trunc i32 undef to i16 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v2 = trunc i32 undef to i8 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v3 = trunc i64 undef to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v4 = trunc i64 undef to i16 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v5 = trunc i64 undef to i8 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = trunc <2 x i16> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v7 = trunc <2 x i32> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v8 = trunc <2 x i32> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = trunc <2 x i64> undef to <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v10 = trunc <2 x i64> undef to <2 x i16> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = trunc <2 x i64> undef to <2 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v12 = trunc <4 x i16> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v13 = trunc <4 x i32> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v14 = trunc <4 x i32> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v15 = trunc <4 x i64> undef to <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v16 = trunc <4 x i64> undef to <4 x i16> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v17 = trunc <4 x i64> undef to <4 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v18 = trunc <8 x i16> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v19 = trunc <8 x i32> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v20 = trunc <8 x i32> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v21 = trunc <8 x i64> undef to <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v22 = trunc <8 x i64> undef to <8 x i16> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v23 = trunc <8 x i64> undef to <8 x i8> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v24 = trunc <16 x i16> undef to <16 x i8> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %v25 = trunc <16 x i32> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v26 = trunc <16 x i32> undef to <16 x i8> +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %v27 = trunc <16 x i64> undef to <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v28 = trunc <16 x i64> undef to <16 x i16> +; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v29 = trunc <16 x i64> undef to <16 x i8> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/load_store.ll b/llvm/test/Analysis/CostModel/SystemZ/load_store.ll new file mode 100644 index 0000000..1ac9229 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/load_store.ll @@ -0,0 +1,137 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +define void @store() { + store i8 undef, i8* undef + store i16 undef, i16* undef + store i32 undef, i32* undef + store i64 undef, i64* undef + store float undef, float* undef + store double undef, double* undef + store fp128 undef, fp128* undef + store <2 x i8> undef, <2 x i8>* undef + store <2 x i16> undef, <2 x i16>* undef + store <2 x i32> undef, <2 x i32>* undef + store <2 x i64> undef, <2 x i64>* undef + store <2 x float> undef, <2 x float>* undef + store <2 x double> undef, <2 x double>* undef + store <4 x i8> undef, <4 x i8>* undef + store <4 x i16> undef, <4 x i16>* undef + store <4 x i32> undef, <4 x i32>* undef + store <4 x i64> undef, <4 x i64>* undef + store <4 x float> undef, <4 x float>* undef + store <4 x double> undef, <4 x double>* undef + store <8 x i8> undef, <8 x i8>* undef + store <8 x i16> undef, <8 x i16>* undef + store <8 x i32> undef, <8 x i32>* undef + store <8 x i64> undef, <8 x i64>* undef + store <8 x float> undef, <8 x float>* undef + store <8 x double> undef, <8 x double>* undef + store <16 x i8> undef, <16 x i8>* undef + store <16 x i16> undef, <16 x i16>* undef + store <16 x i32> undef, <16 x i32>* undef + store <16 x i64> undef, <16 x i64>* undef + store <16 x float> undef, <16 x float>* undef + store <16 x double> undef, <16 x double>* undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store fp128 undef, fp128* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, <16 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: store <16 x double> undef, <16 x double>* undef + + ret void; +} + +define void @load() { + load i8, i8* undef + load i16, i16* undef + load i32, i32* undef + load i64, i64* undef + load float, float* undef + load double, double* undef + load fp128, fp128* undef + load <2 x i8>, <2 x i8>* undef + load <2 x i16>, <2 x i16>* undef + load <2 x i32>, <2 x i32>* undef + load <2 x i64>, <2 x i64>* undef + load <2 x float>, <2 x float>* undef + load <2 x double>, <2 x double>* undef + load <4 x i8>, <4 x i8>* undef + load <4 x i16>, <4 x i16>* undef + load <4 x i32>, <4 x i32>* undef + load <4 x i64>, <4 x i64>* undef + load <4 x float>, <4 x float>* undef + load <4 x double>, <4 x double>* undef + load <8 x i8>, <8 x i8>* undef + load <8 x i16>, <8 x i16>* undef + load <8 x i32>, <8 x i32>* undef + load <8 x i64>, <8 x i64>* undef + load <8 x float>, <8 x float>* undef + load <8 x double>, <8 x double>* undef + load <16 x i8>, <16 x i8>* undef + load <16 x i16>, <16 x i16>* undef + load <16 x i32>, <16 x i32>* undef + load <16 x i64>, <16 x i64>* undef + load <16 x float>, <16 x float>* undef + load <16 x double>, <16 x double>* undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = load float, float* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = load double, double* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = load fp128, fp128* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, <2 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, <2 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, <2 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, <2 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x float>, <2 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <2 x double>, <2 x double>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <4 x i8>, <4 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, <4 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, <4 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <4 x i64>, <4 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <4 x double>, <4 x double>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i8>, <8 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i32>, <8 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <8 x i64>, <8 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %24 = load <8 x float>, <8 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %25 = load <8 x double>, <8 x double>* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <16 x i8>, <16 x i8>* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <16 x i16>, <16 x i16>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %28 = load <16 x i32>, <16 x i32>* undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <16 x i64>, <16 x i64>* undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x float>, <16 x float>* undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %31 = load <16 x double>, <16 x double>* undef + + ret void; +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/logical.ll b/llvm/test/Analysis/CostModel/SystemZ/logical.ll new file mode 100644 index 0000000..41984e0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/logical.ll @@ -0,0 +1,277 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +define void @and() { + %res0 = and i8 undef, undef + %res1 = and i16 undef, undef + %res2 = and i32 undef, undef + %res3 = and i64 undef, undef + %res4 = and <2 x i8> undef, undef + %res5 = and <2 x i16> undef, undef + %res6 = and <2 x i32> undef, undef + %res7 = and <2 x i64> undef, undef + %res8 = and <4 x i8> undef, undef + %res9 = and <4 x i16> undef, undef + %res10 = and <4 x i32> undef, undef + %res11 = and <4 x i64> undef, undef + %res12 = and <8 x i8> undef, undef + %res13 = and <8 x i16> undef, undef + %res14 = and <8 x i32> undef, undef + %res15 = and <8 x i64> undef, undef + %res16 = and <16 x i8> undef, undef + %res17 = and <16 x i16> undef, undef + %res18 = and <16 x i32> undef, undef + %res19 = and <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = and i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = and i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = and i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = and <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = and <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = and <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = and <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = and <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = and <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = and <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = and <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = and <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = and <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = and <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = and <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = and <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = and <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = and <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = and <16 x i64> undef, undef + + ret void; +} + +define void @ashr() { + %res0 = ashr i8 undef, undef + %res1 = ashr i16 undef, undef + %res2 = ashr i32 undef, undef + %res3 = ashr i64 undef, undef + %res4 = ashr <2 x i8> undef, undef + %res5 = ashr <2 x i16> undef, undef + %res6 = ashr <2 x i32> undef, undef + %res7 = ashr <2 x i64> undef, undef + %res8 = ashr <4 x i8> undef, undef + %res9 = ashr <4 x i16> undef, undef + %res10 = ashr <4 x i32> undef, undef + %res11 = ashr <4 x i64> undef, undef + %res12 = ashr <8 x i8> undef, undef + %res13 = ashr <8 x i16> undef, undef + %res14 = ashr <8 x i32> undef, undef + %res15 = ashr <8 x i64> undef, undef + %res16 = ashr <16 x i8> undef, undef + %res17 = ashr <16 x i16> undef, undef + %res18 = ashr <16 x i32> undef, undef + %res19 = ashr <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res0 = ashr i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res1 = ashr i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = ashr i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = ashr i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = ashr <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = ashr <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = ashr <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = ashr <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = ashr <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = ashr <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = ashr <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = ashr <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = ashr <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = ashr <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = ashr <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = ashr <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = ashr <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = ashr <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = ashr <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = ashr <16 x i64> undef, undef + + ret void; +} + +define void @lshr() { + %res0 = lshr i8 undef, undef + %res1 = lshr i16 undef, undef + %res2 = lshr i32 undef, undef + %res3 = lshr i64 undef, undef + %res4 = lshr <2 x i8> undef, undef + %res5 = lshr <2 x i16> undef, undef + %res6 = lshr <2 x i32> undef, undef + %res7 = lshr <2 x i64> undef, undef + %res8 = lshr <4 x i8> undef, undef + %res9 = lshr <4 x i16> undef, undef + %res10 = lshr <4 x i32> undef, undef + %res11 = lshr <4 x i64> undef, undef + %res12 = lshr <8 x i8> undef, undef + %res13 = lshr <8 x i16> undef, undef + %res14 = lshr <8 x i32> undef, undef + %res15 = lshr <8 x i64> undef, undef + %res16 = lshr <16 x i8> undef, undef + %res17 = lshr <16 x i16> undef, undef + %res18 = lshr <16 x i32> undef, undef + %res19 = lshr <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res0 = lshr i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res1 = lshr i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = lshr i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = lshr i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = lshr <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = lshr <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = lshr <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = lshr <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = lshr <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = lshr <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = lshr <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = lshr <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = lshr <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = lshr <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = lshr <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = lshr <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = lshr <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = lshr <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = lshr <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = lshr <16 x i64> undef, undef + + ret void; +} + +define void @or() { + %res0 = or i8 undef, undef + %res1 = or i16 undef, undef + %res2 = or i32 undef, undef + %res3 = or i64 undef, undef + %res4 = or <2 x i8> undef, undef + %res5 = or <2 x i16> undef, undef + %res6 = or <2 x i32> undef, undef + %res7 = or <2 x i64> undef, undef + %res8 = or <4 x i8> undef, undef + %res9 = or <4 x i16> undef, undef + %res10 = or <4 x i32> undef, undef + %res11 = or <4 x i64> undef, undef + %res12 = or <8 x i8> undef, undef + %res13 = or <8 x i16> undef, undef + %res14 = or <8 x i32> undef, undef + %res15 = or <8 x i64> undef, undef + %res16 = or <16 x i8> undef, undef + %res17 = or <16 x i16> undef, undef + %res18 = or <16 x i32> undef, undef + %res19 = or <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = or i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = or i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = or i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = or <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = or <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = or <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = or <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = or <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = or <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = or <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = or <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = or <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = or <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = or <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = or <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = or <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = or <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = or <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = or <16 x i64> undef, undef + + ret void; +} + +define void @shl() { + %res0 = shl i8 undef, undef + %res1 = shl i16 undef, undef + %res2 = shl i32 undef, undef + %res3 = shl i64 undef, undef + %res4 = shl <2 x i8> undef, undef + %res5 = shl <2 x i16> undef, undef + %res6 = shl <2 x i32> undef, undef + %res7 = shl <2 x i64> undef, undef + %res8 = shl <4 x i8> undef, undef + %res9 = shl <4 x i16> undef, undef + %res10 = shl <4 x i32> undef, undef + %res11 = shl <4 x i64> undef, undef + %res12 = shl <8 x i8> undef, undef + %res13 = shl <8 x i16> undef, undef + %res14 = shl <8 x i32> undef, undef + %res15 = shl <8 x i64> undef, undef + %res16 = shl <16 x i8> undef, undef + %res17 = shl <16 x i16> undef, undef + %res18 = shl <16 x i32> undef, undef + %res19 = shl <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = shl i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = shl i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = shl i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = shl i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = shl <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = shl <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = shl <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = shl <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = shl <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = shl <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = shl <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = shl <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = shl <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = shl <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = shl <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = shl <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = shl <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = shl <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = shl <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = shl <16 x i64> undef, undef + + ret void; +} + +define void @xor() { + %res0 = xor i8 undef, undef + %res1 = xor i16 undef, undef + %res2 = xor i32 undef, undef + %res3 = xor i64 undef, undef + %res4 = xor <2 x i8> undef, undef + %res5 = xor <2 x i16> undef, undef + %res6 = xor <2 x i32> undef, undef + %res7 = xor <2 x i64> undef, undef + %res8 = xor <4 x i8> undef, undef + %res9 = xor <4 x i16> undef, undef + %res10 = xor <4 x i32> undef, undef + %res11 = xor <4 x i64> undef, undef + %res12 = xor <8 x i8> undef, undef + %res13 = xor <8 x i16> undef, undef + %res14 = xor <8 x i32> undef, undef + %res15 = xor <8 x i64> undef, undef + %res16 = xor <16 x i8> undef, undef + %res17 = xor <16 x i16> undef, undef + %res18 = xor <16 x i32> undef, undef + %res19 = xor <16 x i64> undef, undef + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = xor i8 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = xor i16 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor <2 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = xor <2 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = xor <2 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res7 = xor <2 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = xor <4 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = xor <4 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = xor <4 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res11 = xor <4 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = xor <8 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = xor <8 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = xor <8 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res15 = xor <8 x i64> undef, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = xor <16 x i8> undef, undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = xor <16 x i16> undef, undef +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = xor <16 x i32> undef, undef +; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res19 = xor <16 x i64> undef, undef + + ret void; +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll new file mode 100644 index 0000000..1b6a50d --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll @@ -0,0 +1,259 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Test that loads into operations that can fold one memory operand get zero +; cost. In the case that both operands are loaded, one load should get a cost +; value. + +define void @add() { + %li32 = load i32, i32* undef + add i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + add i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + add i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + add i64 %li64_0, %li64_1 + + ret void; + +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = add i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = add i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = add i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = add i64 %li64_0, %li64_1 +} + +define void @sub() { + %li32 = load i32, i32* undef + sub i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + sub i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + sub i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + sub i64 %li64_0, %li64_1 + + ret void; + +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = sub i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = sub i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = sub i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sub i64 %li64_0, %li64_1 +} + +define void @mul() { + %li32 = load i32, i32* undef + mul i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + mul i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + mul i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + mul i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = mul i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = mul i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = mul i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = mul i64 %li64_0, %li64_1 +} + +define void @sdiv() { + %li32 = load i32, i32* undef + sdiv i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + sdiv i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + sdiv i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + sdiv i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %2 = sdiv i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = sdiv i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sdiv i64 %li64_0, %li64_1 +} + +define void @udiv() { + %li32 = load i32, i32* undef + udiv i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + udiv i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + udiv i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + udiv i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = udiv i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %3 = udiv i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %4 = udiv i64 %li64_0, %li64_1 +} + +define void @and() { + %li32 = load i32, i32* undef + and i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + and i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + and i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + and i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = and i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = and i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = and i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = and i64 %li64_0, %li64_1 +} + +define void @or() { + %li32 = load i32, i32* undef + or i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + or i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + or i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + or i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = or i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = or i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = or i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = or i64 %li64_0, %li64_1 +} + +define void @xor() { + %li32 = load i32, i32* undef + xor i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + xor i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + xor i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + xor i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = xor i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = xor i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = xor i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = xor i64 %li64_0, %li64_1 +} + +define void @icmp() { + %li32 = load i32, i32* undef + icmp eq i32 %li32, undef + + %li32_0 = load i32, i32* undef + %li32_1 = load i32, i32* undef + icmp eq i32 %li32_0, %li32_1 + + %li64 = load i64, i64* undef + icmp eq i64 %li64, undef + + %li64_0 = load i64, i64* undef + %li64_1 = load i64, i64* undef + icmp eq i64 %li64_0, %li64_1 + + ret void; +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp eq i32 %li32, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = icmp eq i32 %li32_0, %li32_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %li64, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp eq i64 %li64_0, %li64_1 +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/scalar-cmp-cmp-log-sel.ll b/llvm/test/Analysis/CostModel/SystemZ/scalar-cmp-cmp-log-sel.ll new file mode 100644 index 0000000..9ba9807 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/scalar-cmp-cmp-log-sel.ll @@ -0,0 +1,1624 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; TODO: add more tests for differing operand types of the two compares. + +define i8 @fun0(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun0 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun1(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun1 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun2(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun2 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun3(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun3 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun4(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun4 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun5(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun5 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun6(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun6 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun7(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun7 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun8(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun8 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun9(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun9 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun10(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun10 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun11(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun11 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun12(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun12 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun13(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun13 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun14(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun14 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun15(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun15 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun16(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun16 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun17(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun17 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun18(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun18 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun19(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun19 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun20(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun20 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun21(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun21 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun22(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun22 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun23(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun23 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun24(float %val1, float %val2, float %val3, float %val4, + i8 %val5, i8 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun24 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun25(float %val1, float %val2, float %val3, float %val4, + i16 %val5, i16 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun25 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun26(float %val1, float %val2, float %val3, float %val4, + i32 %val5, i32 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun26 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun27(float %val1, float %val2, float %val3, float %val4, + i64 %val5, i64 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun27 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun28(float %val1, float %val2, float %val3, float %val4, + float %val5, float %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun28 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun29(float %val1, float %val2, float %val3, float %val4, + double %val5, double %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun29 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun30(double %val1, double %val2, double %val3, double %val4, + i8 %val5, i8 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun30 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun31(double %val1, double %val2, double %val3, double %val4, + i16 %val5, i16 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun31 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun32(double %val1, double %val2, double %val3, double %val4, + i32 %val5, i32 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun32 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun33(double %val1, double %val2, double %val3, double %val4, + i64 %val5, i64 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun33 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun34(double %val1, double %val2, double %val3, double %val4, + float %val5, float %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun34 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun35(double %val1, double %val2, double %val3, double %val4, + double %val5, double %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = and i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun35 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = and i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun36(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun36 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun37(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun37 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun38(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun38 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun39(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun39 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun40(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun40 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun41(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun41 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun42(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun42 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun43(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun43 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun44(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun44 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun45(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun45 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun46(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun46 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun47(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun47 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun48(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun48 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun49(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun49 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun50(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun50 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun51(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun51 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun52(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun52 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun53(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun53 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun54(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun54 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun55(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun55 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun56(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun56 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun57(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun57 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun58(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun58 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun59(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun59 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun60(float %val1, float %val2, float %val3, float %val4, + i8 %val5, i8 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun60 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun61(float %val1, float %val2, float %val3, float %val4, + i16 %val5, i16 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun61 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun62(float %val1, float %val2, float %val3, float %val4, + i32 %val5, i32 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun62 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun63(float %val1, float %val2, float %val3, float %val4, + i64 %val5, i64 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun63 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun64(float %val1, float %val2, float %val3, float %val4, + float %val5, float %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun64 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun65(float %val1, float %val2, float %val3, float %val4, + double %val5, double %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun65 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun66(double %val1, double %val2, double %val3, double %val4, + i8 %val5, i8 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun66 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun67(double %val1, double %val2, double %val3, double %val4, + i16 %val5, i16 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun67 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun68(double %val1, double %val2, double %val3, double %val4, + i32 %val5, i32 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun68 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun69(double %val1, double %val2, double %val3, double %val4, + i64 %val5, i64 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun69 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun70(double %val1, double %val2, double %val3, double %val4, + float %val5, float %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun70 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun71(double %val1, double %val2, double %val3, double %val4, + double %val5, double %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = or i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun71 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 1 for instruction: %and = or i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun72(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun72 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun73(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun73 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun74(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun74 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun75(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun75 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun76(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun76 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun77(i8 %val1, i8 %val2, i8 %val3, i8 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i8 %val1, %val2 + %cmp1 = icmp eq i8 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun77 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i8 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i8 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun78(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun78 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun79(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun79 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun80(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun80 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun81(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun81 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun82(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun82 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun83(i16 %val1, i16 %val2, i16 %val3, i16 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i16 %val1, %val2 + %cmp1 = icmp eq i16 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun83 +; CHECK: cost of 3 for instruction: %cmp0 = icmp eq i16 %val1, %val2 +; CHECK: cost of 3 for instruction: %cmp1 = icmp eq i16 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun84(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun84 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun85(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun85 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun86(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun86 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun87(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun87 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun88(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun88 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun89(i32 %val1, i32 %val2, i32 %val3, i32 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i32 %val1, %val2 + %cmp1 = icmp eq i32 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun89 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i32 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i32 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun90(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i8 %val5, i8 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun90 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun91(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i16 %val5, i16 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun91 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun92(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i32 %val5, i32 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun92 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun93(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + i64 %val5, i64 %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun93 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun94(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + float %val5, float %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun94 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun95(i64 %val1, i64 %val2, i64 %val3, i64 %val4, + double %val5, double %val6) { + %cmp0 = icmp eq i64 %val1, %val2 + %cmp1 = icmp eq i64 %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun95 +; CHECK: cost of 1 for instruction: %cmp0 = icmp eq i64 %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = icmp eq i64 %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun96(float %val1, float %val2, float %val3, float %val4, + i8 %val5, i8 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun96 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun97(float %val1, float %val2, float %val3, float %val4, + i16 %val5, i16 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun97 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun98(float %val1, float %val2, float %val3, float %val4, + i32 %val5, i32 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun98 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun99(float %val1, float %val2, float %val3, float %val4, + i64 %val5, i64 %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun99 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun100(float %val1, float %val2, float %val3, float %val4, + float %val5, float %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun100 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun101(float %val1, float %val2, float %val3, float %val4, + double %val5, double %val6) { + %cmp0 = fcmp ogt float %val1, %val2 + %cmp1 = fcmp ogt float %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun101 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt float %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt float %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + +define i8 @fun102(double %val1, double %val2, double %val3, double %val4, + i8 %val5, i8 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i8 %val5, i8 %val6 + ret i8 %sel + +; CHECK: fun102 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i8 %val5, i8 %val6 +} + +define i16 @fun103(double %val1, double %val2, double %val3, double %val4, + i16 %val5, i16 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i16 %val5, i16 %val6 + ret i16 %sel + +; CHECK: fun103 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i16 %val5, i16 %val6 +} + +define i32 @fun104(double %val1, double %val2, double %val3, double %val4, + i32 %val5, i32 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i32 %val5, i32 %val6 + ret i32 %sel + +; CHECK: fun104 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i32 %val5, i32 %val6 +} + +define i64 @fun105(double %val1, double %val2, double %val3, double %val4, + i64 %val5, i64 %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, i64 %val5, i64 %val6 + ret i64 %sel + +; CHECK: fun105 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 1 for instruction: %sel = select i1 %and, i64 %val5, i64 %val6 +} + +define float @fun106(double %val1, double %val2, double %val3, double %val4, + float %val5, float %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, float %val5, float %val6 + ret float %sel + +; CHECK: fun106 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, float %val5, float %val6 +} + +define double @fun107(double %val1, double %val2, double %val3, double %val4, + double %val5, double %val6) { + %cmp0 = fcmp ogt double %val1, %val2 + %cmp1 = fcmp ogt double %val3, %val4 + %and = xor i1 %cmp0, %cmp1 + %sel = select i1 %and, double %val5, double %val6 + ret double %sel + +; CHECK: fun107 +; CHECK: cost of 1 for instruction: %cmp0 = fcmp ogt double %val1, %val2 +; CHECK: cost of 1 for instruction: %cmp1 = fcmp ogt double %val3, %val4 +; CHECK: cost of 7 for instruction: %and = xor i1 %cmp0, %cmp1 +; CHECK: cost of 4 for instruction: %sel = select i1 %and, double %val5, double %val6 +} + diff --git a/llvm/test/Analysis/CostModel/SystemZ/shuffle.ll b/llvm/test/Analysis/CostModel/SystemZ/shuffle.ll new file mode 100644 index 0000000..e40dc1f --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/shuffle.ll @@ -0,0 +1,112 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +; CHECK: shuffle +define void @shuffle() { + + ;; Reverse shuffles + shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + + shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + + shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + + shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + + shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> + + ;; Alternate shuffles + shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + + shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + + shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + + shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + + shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> + shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> + + ;; Broadcast shuffles + shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer + shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer + + shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer + shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer + + shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer + shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer + + shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer + shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer + + shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer + shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer + + ;; Random shuffles + shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + shufflevector <18 x i8> undef, <18 x i8> undef, <18 x i32> + + shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + shufflevector <12 x i16> undef, <12 x i16> undef, <12 x i32> + + shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + shufflevector <6 x i32> undef, <6 x i32> undef, <6 x i32> + + shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> + + shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> + shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> + + ret void + +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %11 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %12 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %17 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %18 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %19 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %20 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %21 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %22 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %23 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %24 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %25 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %26 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %27 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %28 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %29 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %30 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %31 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %32 = shufflevector <18 x i8> undef, <18 x i8> undef, <18 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %33 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %34 = shufflevector <12 x i16> undef, <12 x i16> undef, <12 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %35 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %36 = shufflevector <6 x i32> undef, <6 x i32> undef, <6 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %37 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %38 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %39 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %40 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/vectorinstrs.ll b/llvm/test/Analysis/CostModel/SystemZ/vectorinstrs.ll new file mode 100644 index 0000000..b557076 --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/vectorinstrs.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +; CHECK: vecinstrs +define void @vecinstrs() { + + ;; Extract element is penalized somewhat with a cost of 2 for index 0. + extractelement <16 x i8> undef, i32 0 + extractelement <16 x i8> undef, i32 1 + + extractelement <8 x i16> undef, i32 0 + extractelement <8 x i16> undef, i32 1 + + extractelement <4 x i32> undef, i32 0 + extractelement <4 x i32> undef, i32 1 + + extractelement <2 x i64> undef, i32 0 + extractelement <2 x i64> undef, i32 1 + + extractelement <2 x double> undef, i32 0 + extractelement <2 x double> undef, i32 1 + + ; Extraction of i1 means extract + test under mask before branch. + extractelement <2 x i1> undef, i32 0 + extractelement <4 x i1> undef, i32 1 + extractelement <8 x i1> undef, i32 2 + + ;; Insert element + insertelement <16 x i8> undef, i8 undef, i32 0 + insertelement <8 x i16> undef, i16 undef, i32 0 + insertelement <4 x i32> undef, i32 undef, i32 0 + + ; vlvgp will do two grs into a vector register: only add cost half of the time. + insertelement <2 x i64> undef, i64 undef, i32 0 + insertelement <2 x i64> undef, i64 undef, i32 1 + + ret void + +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <16 x i8> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = extractelement <16 x i8> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %3 = extractelement <8 x i16> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = extractelement <8 x i16> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %5 = extractelement <4 x i32> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = extractelement <4 x i32> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = extractelement <2 x i64> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = extractelement <2 x i64> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = extractelement <2 x double> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = extractelement <2 x double> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %11 = extractelement <2 x i1> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %12 = extractelement <4 x i1> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %13 = extractelement <8 x i1> undef, i32 2 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = insertelement <16 x i8> undef, i8 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = insertelement <8 x i16> undef, i16 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = insertelement <4 x i32> undef, i32 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %17 = insertelement <2 x i64> undef, i64 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %18 = insertelement <2 x i64> undef, i64 undef, i32 1 +} diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll new file mode 100644 index 0000000..5c15ee4 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll @@ -0,0 +1,70 @@ +; REQUIRES: asserts +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \ +; RUN: -force-vector-width=4 -debug-only=loop-vectorize \ +; RUN: -disable-output < %s 2>&1 | FileCheck %s +; +; Check that the loop vectorizer performs memory interleaving with accurate +; cost estimations. + + +; Simple case where just the load is interleaved, because the store group +; would have gaps. +define void @fun0(i32* %data, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i + %tmp1 = load i32, i32* %tmp0, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp0, align 4 + %i.next = add nuw nsw i64 %i, 2 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void + +; CHECK: LV: Creating an interleave group with: %tmp1 = load i32, i32* %tmp0, align 4 +; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4 +; (vl; vl; vperm) +} + +; Interleaving of both load and stores. +define void @fun1(i32* %data, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i + %tmp1 = load i32, i32* %tmp0, align 4 + %i_1 = add i64 %i, 1 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 %i_1 + %tmp3 = load i32, i32* %tmp2, align 4 + store i32 %tmp1, i32* %tmp2, align 4 + store i32 %tmp3, i32* %tmp0, align 4 + %i.next = add nuw nsw i64 %i, 2 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void + +; CHECK: LV: Creating an interleave group with: store i32 %tmp3, i32* %tmp0, align 4 +; CHECK: LV: Inserted: store i32 %tmp1, i32* %tmp2, align 4 +; CHECK: into the interleave group with store i32 %tmp3, i32* %tmp0, align 4 +; CHECK: LV: Creating an interleave group with: %tmp3 = load i32, i32* %tmp2, align 4 +; CHECK: LV: Inserted: %tmp1 = load i32, i32* %tmp0, align 4 +; CHECK: into the interleave group with %tmp3 = load i32, i32* %tmp2, align 4 + +; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, i32* %tmp0, align 4 +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp2, align 4 +; (vl; vl; vperm, vpkg) + +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp1, i32* %tmp2, align 4 +; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, i32* %tmp0, align 4 +; (vmrlf; vmrhf; vst; vst) +} + -- 2.7.4