From 11765b77be84d793ebedc5b5436c463490746131 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Sep 2022 16:55:12 +0100 Subject: [PATCH] [CostModel][X86] Add CostKinds handling for fmul ops This was achieved with an updated version of the 'cost-tables vs llvm-mca' script D103695 As we're using 'typical' worst case values, not all cost entries come from a single CPU - e.g. the latency/throughput from haswell but the size-latency(uops) from zen1/alderlake-e due to 'double pumping' --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 55 ++++++++++---- .../Analysis/CostModel/X86/arith-fp-latency.ll | 86 +++++++++++++++++++--- .../Analysis/CostModel/X86/arith-fp-sizelatency.ll | 86 +++++++++++++++++++--- .../Analysis/CostModel/X86/intrinsic-cost-kinds.ll | 8 +- 4 files changed, 195 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 359e710..e3931ea 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -343,9 +343,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( static const CostKindTblEntry SLMCostTable[] = { { ISD::MUL, MVT::v4i32, { 11 } }, // pmulld { ISD::MUL, MVT::v8i16, { 2 } }, // pmullw - { ISD::FMUL, MVT::f64, { 2 } }, // mulsd - { ISD::FMUL, MVT::v2f64, { 4 } }, // mulpd - { ISD::FMUL, MVT::v4f32, { 2 } }, // mulps + { ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // mulsd + { ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } }, // mulss + { ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } }, // mulpd + { ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } }, // mulps { ISD::FDIV, MVT::f32, { 17 } }, // divss { ISD::FDIV, MVT::v4f32, { 39 } }, // divps { ISD::FDIV, MVT::f64, { 32 } }, // divsd @@ -711,7 +712,11 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ - { ISD::FMUL, MVT::v8f64, { 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::f64, { 4 } }, // Skylake from http://www.agner.org/ { ISD::FDIV, MVT::v2f64, { 4 } }, // Skylake from http://www.agner.org/ { ISD::FDIV, MVT::v4f64, { 8 } }, // Skylake from http://www.agner.org/ @@ -722,7 +727,11 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ - { ISD::FMUL, MVT::v16f32, { 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::f32, { 3 } }, // Skylake from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, { 3 } }, // Skylake from http://www.agner.org/ { ISD::FDIV, MVT::v8f32, { 5 } }, // Skylake from http://www.agner.org/ @@ -908,10 +917,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } }, // vsubpd { ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } }, // vsubps - { ISD::FMUL, MVT::f64, { 1 } }, // Haswell from http://www.agner.org/ - { ISD::FMUL, MVT::v2f64, { 1 } }, // Haswell from http://www.agner.org/ - { ISD::FMUL, MVT::v4f64, { 1 } }, // Haswell from http://www.agner.org/ - { ISD::FMUL, MVT::v8f32, { 1 } }, // Haswell from http://www.agner.org/ + { ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } }, // vmulsd + { ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } }, // vmulss + { ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } }, // vmulpd + { ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } }, // vmulps + { ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } }, // vmulpd + { ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } }, // vmulps { ISD::FDIV, MVT::f32, { 7 } }, // Haswell from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, { 7 } }, // Haswell from http://www.agner.org/ @@ -998,9 +1009,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } }, // BDVER2 from http://www.agner.org/ { ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } }, // BDVER2 from http://www.agner.org/ - { ISD::FMUL, MVT::f64, { 2 } }, // BTVER2 from http://www.agner.org/ - { ISD::FMUL, MVT::v2f64, { 2 } }, // BTVER2 from http://www.agner.org/ - { ISD::FMUL, MVT::v4f64, { 4 } }, // BTVER2 from http://www.agner.org/ + { ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/ + { ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/ + { ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/ + { ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/ + { ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/ + { ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/ { ISD::FDIV, MVT::f32, { 14 } }, // SNB from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, { 14 } }, // SNB from http://www.agner.org/ @@ -1026,10 +1040,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } }, // Nehalem from http://www.agner.org/ { ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } }, // Nehalem from http://www.agner.org/ - { ISD::FMUL, MVT::f64, { 1 } }, // Nehalem from http://www.agner.org/ - { ISD::FMUL, MVT::f32, { 1 } }, // Nehalem from http://www.agner.org/ - { ISD::FMUL, MVT::v2f64, { 1 } }, // Nehalem from http://www.agner.org/ - { ISD::FMUL, MVT::v4f32, { 1 } }, // Nehalem from http://www.agner.org/ + { ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/ + { ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/ + { ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/ + { ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/ { ISD::FDIV, MVT::f32, { 14 } }, // Nehalem from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, { 14 } }, // Nehalem from http://www.agner.org/ @@ -1119,6 +1133,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } }, // Pentium IV from http://www.agner.org/ { ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } }, // Pentium IV from http://www.agner.org/ { ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } }, // Pentium IV from http://www.agner.org/ + + { ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // Pentium IV from http://www.agner.org/ + { ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } }, // Pentium IV from http://www.agner.org/ }; if (ST->hasSSE2()) @@ -1138,6 +1155,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } }, // Pentium III from http://www.agner.org/ { ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } }, // Pentium III from http://www.agner.org/ + + { ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } }, // Pentium III from http://www.agner.org/ + { ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } }, // Pentium III from http://www.agner.org/ }; if (ST->hasSSE1()) @@ -1173,6 +1193,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } }, // (x87) { ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } }, // (x87) + + { ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } }, // (x87) + { ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // (x87) }; if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second)) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll index 3161649..4e47a66 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll @@ -420,16 +420,82 @@ define i32 @fneg(i32 %arg) { } define i32 @fmul(i32 %arg) { -; CHECK-LABEL: 'fmul' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fmul float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fmul <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fmul <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fmul <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fmul double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fmul <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fmul <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fmul <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE1-LABEL: 'fmul' +; SSE1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fmul' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fmul' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fmul' +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fmul <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fmul <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fmul <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fmul <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fmul' +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fmul float undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fmul <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fmul double undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fmul <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fmul' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fmul float undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fmul' +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fmul <4 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fmul <8 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fmul float undef, undef %V4F32 = fmul <4 x float> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll index ca3074c..96c918b 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll @@ -376,16 +376,82 @@ define i32 @fneg(i32 %arg) { } define i32 @fmul(i32 %arg) { -; CHECK-LABEL: 'fmul' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE1-LABEL: 'fmul' +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fmul' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE42-LABEL: 'fmul' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fmul' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fmul' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'fmul' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GLM-LABEL: 'fmul' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fmul float undef, undef %V4F32 = fmul <4 x float> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index ef7c4fa..01b1b7f 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -133,8 +133,8 @@ define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmuladd' -; LATE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmuladd' @@ -144,7 +144,7 @@ define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float ; ; SIZE_LATE-LABEL: 'fmuladd' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) @@ -377,7 +377,7 @@ define void @reduce_fmul(<16 x float> %va) { ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmul' -; LATE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul' -- 2.7.4