From 83552e8c72b8c41d7a07fbd4bf859ece3eb55099 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Sep 2022 11:58:29 +0100 Subject: [PATCH] [CostModel][X86] Add CostKinds handling for SSE FCMP_ONE/FCMP_UEQ predicates These require special handling to account for their expansion in lowering. I'm trying very hard not to have to add predicate specific costs - but it might be inevitable..... --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 13 ++++- llvm/test/Analysis/CostModel/X86/fcmp-codesize.ll | 32 +++++------ llvm/test/Analysis/CostModel/X86/fcmp-latency.ll | 32 +++++------ .../Analysis/CostModel/X86/fcmp-sizelatency.ll | 32 +++++------ llvm/test/Analysis/CostModel/X86/fcmp.ll | 64 +++++++++++----------- 5 files changed, 92 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 8b737b6..538e724 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2867,7 +2867,6 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Pred == CmpInst::BAD_FCMP_PREDICATE)) Pred = cast(I)->getPredicate(); - // TODO: Handle pre-AVX FCMP_ONE/FCMP_UEQ slow cases. switch (Pred) { case CmpInst::Predicate::ICMP_NE: // xor(cmpeq(x,y),-1) @@ -2896,6 +2895,18 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, ExtraCost = 3; } break; + case CmpInst::Predicate::FCMP_ONE: + case CmpInst::Predicate::FCMP_UEQ: + // Without AVX we need to expand FCMP_ONE/FCMP_UEQ cases. + // Use FCMP_UEQ expansion - FCMP_ONE should be the same. + if (CondTy && !ST->hasAVX()) + return getCmpSelInstrCost(Opcode, ValTy, CondTy, + CmpInst::Predicate::FCMP_UNO, CostKind) + + getCmpSelInstrCost(Opcode, ValTy, CondTy, + CmpInst::Predicate::FCMP_OEQ, CostKind) + + getArithmeticInstrCost(Instruction::Or, CondTy, CostKind); + + break; case CmpInst::Predicate::BAD_ICMP_PREDICATE: case CmpInst::Predicate::BAD_FCMP_PREDICATE: // Assume worst case scenario and add the maximum extra cost. diff --git a/llvm/test/Analysis/CostModel/X86/fcmp-codesize.ll b/llvm/test/Analysis/CostModel/X86/fcmp-codesize.ll index e1e8ff8..1cb0f07 100644 --- a/llvm/test/Analysis/CostModel/X86/fcmp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/fcmp-codesize.ll @@ -71,15 +71,15 @@ define i32 @cmp_float_oeq(i32 %arg) { define i32 @cmp_float_one(i32 %arg) { ; SSE-LABEL: 'cmp_float_one' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'cmp_float_one' @@ -401,15 +401,15 @@ define i32 @cmp_float_olt(i32 %arg) { define i32 @cmp_float_ueq(i32 %arg) { ; SSE-LABEL: 'cmp_float_ueq' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'cmp_float_ueq' diff --git a/llvm/test/Analysis/CostModel/X86/fcmp-latency.ll b/llvm/test/Analysis/CostModel/X86/fcmp-latency.ll index a3d8b51..a2b9564 100644 --- a/llvm/test/Analysis/CostModel/X86/fcmp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fcmp-latency.ll @@ -84,15 +84,15 @@ define i32 @cmp_float_oeq(i32 %arg) { define i32 @cmp_float_one(i32 %arg) { ; SSE-LABEL: 'cmp_float_one' ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fcmp one float undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fcmp one double undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'cmp_float_one' @@ -492,15 +492,15 @@ define i32 @cmp_float_olt(i32 %arg) { define i32 @cmp_float_ueq(i32 %arg) { ; SSE-LABEL: 'cmp_float_ueq' ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fcmp ueq float undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fcmp ueq double undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'cmp_float_ueq' diff --git a/llvm/test/Analysis/CostModel/X86/fcmp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fcmp-sizelatency.ll index e07a834..bada841 100644 --- a/llvm/test/Analysis/CostModel/X86/fcmp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/fcmp-sizelatency.ll @@ -71,15 +71,15 @@ define i32 @cmp_float_oeq(i32 %arg) { define i32 @cmp_float_one(i32 %arg) { ; SSE-LABEL: 'cmp_float_one' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'cmp_float_one' @@ -401,15 +401,15 @@ define i32 @cmp_float_olt(i32 %arg) { define i32 @cmp_float_ueq(i32 %arg) { ; SSE-LABEL: 'cmp_float_ueq' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'cmp_float_ueq' diff --git a/llvm/test/Analysis/CostModel/X86/fcmp.ll b/llvm/test/Analysis/CostModel/X86/fcmp.ll index 2a9fbfb..5a42ebf 100644 --- a/llvm/test/Analysis/CostModel/X86/fcmp.ll +++ b/llvm/test/Analysis/CostModel/X86/fcmp.ll @@ -97,28 +97,28 @@ define i32 @cmp_float_oeq(i32 %arg) { define i32 @cmp_float_one(i32 %arg) { ; SSE2-LABEL: 'cmp_float_one' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_one' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'cmp_float_one' @@ -583,28 +583,28 @@ define i32 @cmp_float_olt(i32 %arg) { define i32 @cmp_float_ueq(i32 %arg) { ; SSE2-LABEL: 'cmp_float_ueq' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE41-LABEL: 'cmp_float_ueq' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'cmp_float_ueq' -- 2.7.4