From b06c55a6986e0e1d571663eec507664013b22f00 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 16 Apr 2021 15:25:23 +0300 Subject: [PATCH] [X86][CostModel] Fix cost model for non-power-of-two vector load/stores Sometimes LV has to produce really wide vectors, and sometimes they end up being not powers of two. As it can be seen from the diff, the cost computation is currently completely non-sensical in those cases. Instead of just scalarizing everything, split/factorize the wide vector into a number of subvectors, each one having a power-of-two elements, recurse to get the cost of op on this subvector. Also, check how we'd legalize this subvector, and if the legalized type is scalar, also account for the scalarization cost. Note that for sub-vector loads, we might be able to do better, when the vectors are properly aligned. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D100099 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 62 ++-- llvm/test/Analysis/CostModel/X86/load_store.ll | 384 ++++++++++++------------- 2 files changed, 226 insertions(+), 220 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index fa3f97e..9a17077 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3209,42 +3209,48 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return TTI::TCC_Basic; } - // Handle non-power-of-two vectors such as <3 x float> - if (auto *VTy = dyn_cast(Src)) { - unsigned NumElem = VTy->getNumElements(); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + // Type legalization can't handle structs + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); - // Handle a few common cases: - // <3 x float> - if (NumElem == 3 && VTy->getScalarSizeInBits() == 32) - // Cost = 64 bit store + extract + 32 bit store. - return 3; + // Handle non-power-of-two vectors such as <3 x float> and <48 x i16> + if (auto *VTy = dyn_cast(Src)) { + const unsigned NumElem = VTy->getNumElements(); + if (!isPowerOf2_32(NumElem)) { + // Factorize NumElem into sum of power-of-two. + InstructionCost Cost = 0; + unsigned NumElemDone = 0; + for (unsigned NumElemLeft = NumElem, Factor; + Factor = PowerOf2Floor(NumElemLeft), NumElemLeft > 0; + NumElemLeft -= Factor) { + Type *SubTy = FixedVectorType::get(VTy->getScalarType(), Factor); + unsigned SubTyBytes = SubTy->getPrimitiveSizeInBits() / 8; - // <3 x double> - if (NumElem == 3 && VTy->getScalarSizeInBits() == 64) - // Cost = 128 bit store + unpack + 64 bit store. - return 3; + Cost += + getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind); + + std::pair LST = TLI->getTypeLegalizationCost(DL, SubTy); + if (!LST.second.isVector()) { + APInt DemandedElts = + APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor); + Cost += getScalarizationOverhead(VTy, DemandedElts, + Opcode == Instruction::Load, + Opcode == Instruction::Store); + } - // Assume that all other non-power-of-two numbers are scalarized. - if (!isPowerOf2_32(NumElem)) { - APInt DemandedElts = APInt::getAllOnesValue(NumElem); - InstructionCost Cost = BaseT::getMemoryOpCost( - Opcode, VTy->getScalarType(), Alignment, AddressSpace, CostKind); - int SplitCost = getScalarizationOverhead(VTy, DemandedElts, - Opcode == Instruction::Load, - Opcode == Instruction::Store); - return NumElem * Cost + SplitCost; + NumElemDone += Factor; + Alignment = commonAlignment(Alignment.valueOrOne(), SubTyBytes); + } + assert(NumElemDone == NumElem && "Processed wrong element count?"); + return Cost; } } - // Type legalization can't handle structs - if (TLI->getValueType(DL, Src, true) == MVT::Other) - return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); - // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && - "Invalid Opcode"); // Each load/store unit costs 1. int Cost = LT.first * 1; diff --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll index 9bb0280..2614043 100644 --- a/llvm/test/Analysis/CostModel/X86/load_store.ll +++ b/llvm/test/Analysis/CostModel/X86/load_store.ll @@ -20,23 +20,23 @@ define i32 @stores(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'stores' @@ -54,21 +54,21 @@ define i32 @stores(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 117 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'stores' @@ -86,21 +86,21 @@ define i32 @stores(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 61 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 125 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; store i8 undef, i8* undef, align 4 @@ -158,23 +158,23 @@ define i32 @stores_align(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'stores_align' @@ -192,21 +192,21 @@ define i32 @stores_align(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 117 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'stores_align' @@ -224,21 +224,21 @@ define i32 @stores_align(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 61 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 125 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; store i8 undef, i8* undef, align 64 @@ -295,24 +295,24 @@ define i32 @loads(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads' @@ -328,23 +328,23 @@ define i32 @loads(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads' @@ -360,23 +360,23 @@ define i32 @loads(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 4 @@ -433,24 +433,24 @@ define i32 @loads_align(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads_align' @@ -466,23 +466,23 @@ define i32 @loads_align(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads_align' @@ -498,23 +498,23 @@ define i32 @loads_align(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 64 -- 2.7.4