[CostModel][X86] Add CostKinds handling for fdiv ops

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 3 Sep 2022 14:48:33 +0000 (15:48 +0100)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 3 Sep 2022 14:48:39 +0000 (15:48 +0100)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 3 Sep 2022 14:48:33 +0000 (15:48 +0100)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 3 Sep 2022 14:48:39 +0000 (15:48 +0100)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index a06a42e..d21e9e6 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -329,10 +329,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
    }
  
    static const CostKindTblEntry GLMCostTable[] = {
-    { ISD::FDIV,  MVT::f32,   { 18 } }, // divss
-    { ISD::FDIV,  MVT::v4f32, { 35 } }, // divps
-    { ISD::FDIV,  MVT::f64,   { 33 } }, // divsd
-    { ISD::FDIV,  MVT::v2f64, { 65 } }, // divpd
+    { ISD::FDIV,  MVT::f32,   { 18, 19, 1, 1 } }, // divss
+    { ISD::FDIV,  MVT::v4f32, { 35, 36, 1, 1 } }, // divps
+    { ISD::FDIV,  MVT::f64,   { 33, 34, 1, 1 } }, // divsd
+    { ISD::FDIV,  MVT::v2f64, { 65, 66, 1, 1 } }, // divpd
    };
  
    if (ST->useGLMDivSqrtCosts())
@@ -347,10 +347,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FMUL,  MVT::f32,   {  1,  4, 1, 1 } }, // mulss
      { ISD::FMUL,  MVT::v2f64, {  4,  7, 1, 1 } }, // mulpd
      { ISD::FMUL,  MVT::v4f32, {  2,  5, 1, 1 } }, // mulps
-    { ISD::FDIV,  MVT::f32,   { 17 } }, // divss
-    { ISD::FDIV,  MVT::v4f32, { 39 } }, // divps
-    { ISD::FDIV,  MVT::f64,   { 32 } }, // divsd
-    { ISD::FDIV,  MVT::v2f64, { 69 } }, // divpd
+    { ISD::FDIV,  MVT::f32,   { 17, 19, 1, 1 } }, // divss
+    { ISD::FDIV,  MVT::v4f32, { 39, 39, 1, 6 } }, // divps
+    { ISD::FDIV,  MVT::f64,   { 32, 34, 1, 1 } }, // divsd
+    { ISD::FDIV,  MVT::v2f64, { 69, 69, 1, 6 } }, // divpd
      { ISD::FADD,  MVT::v2f64, {  2,  4, 1, 1 } }, // addpd
      { ISD::FSUB,  MVT::v2f64, {  2,  4, 1, 1 } }, // subpd
      // v2i64/v4i64 mul is custom lowered as a series of long:
@@ -717,10 +717,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FMUL,    MVT::v2f64,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
      { ISD::FMUL,    MVT::f64,     {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
  
-    { ISD::FDIV,    MVT::f64,     {  4 } }, // Skylake from http://www.agner.org/
-    { ISD::FDIV,    MVT::v2f64,   {  4 } }, // Skylake from http://www.agner.org/
-    { ISD::FDIV,    MVT::v4f64,   {  8 } }, // Skylake from http://www.agner.org/
-    { ISD::FDIV,    MVT::v8f64,   { 16 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::f64,     {  4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::v2f64,   {  4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::v4f64,   {  8, 14, 1, 1 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::v8f64,   { 16, 23, 1, 3 } }, // Skylake from http://www.agner.org/
  
      { ISD::FNEG,    MVT::v16f32,  {  1,  1, 1, 2 } }, // Skylake from http://www.agner.org/
      { ISD::FADD,    MVT::v16f32,  {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
@@ -732,10 +732,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FMUL,    MVT::v4f32,   {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
      { ISD::FMUL,    MVT::f32,     {  1,  4, 1, 1 } }, // Skylake from http://www.agner.org/
  
-    { ISD::FDIV,    MVT::f32,     {  3 } }, // Skylake from http://www.agner.org/
-    { ISD::FDIV,    MVT::v4f32,   {  3 } }, // Skylake from http://www.agner.org/
-    { ISD::FDIV,    MVT::v8f32,   {  5 } }, // Skylake from http://www.agner.org/
-    { ISD::FDIV,    MVT::v16f32,  { 10 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::f32,     {  3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::v4f32,   {  3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::v8f32,   {  5, 11, 1, 1 } }, // Skylake from http://www.agner.org/
+    { ISD::FDIV,    MVT::v16f32,  { 10, 18, 1, 3 } }, // Skylake from http://www.agner.org/
    };
  
    if (ST->hasAVX512())
@@ -924,12 +924,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FMUL, MVT::v4f64,   {  1,  5, 1, 2 } }, // vmulpd
      { ISD::FMUL, MVT::v8f32,   {  1,  5, 1, 2 } }, // vmulps
  
-    { ISD::FDIV, MVT::f32,     {  7 } }, // Haswell from http://www.agner.org/
-    { ISD::FDIV, MVT::v4f32,   {  7 } }, // Haswell from http://www.agner.org/
-    { ISD::FDIV, MVT::v8f32,   { 14 } }, // Haswell from http://www.agner.org/
-    { ISD::FDIV, MVT::f64,     { 14 } }, // Haswell from http://www.agner.org/
-    { ISD::FDIV, MVT::v2f64,   { 14 } }, // Haswell from http://www.agner.org/
-    { ISD::FDIV, MVT::v4f64,   { 28 } }, // Haswell from http://www.agner.org/
+    { ISD::FDIV, MVT::f32,     {  7, 13, 1, 1 } }, // vdivss
+    { ISD::FDIV, MVT::v4f32,   {  7, 13, 1, 1 } }, // vdivps
+    { ISD::FDIV, MVT::v8f32,   { 14, 21, 1, 3 } }, // vdivps
+    { ISD::FDIV, MVT::f64,     { 14, 20, 1, 1 } }, // vdivsd
+    { ISD::FDIV, MVT::v2f64,   { 14, 20, 1, 1 } }, // vdivpd
+    { ISD::FDIV, MVT::v4f64,   { 28, 35, 1, 3 } }, // vdivpd
    };
  
    // Look for AVX2 lowering tricks for custom cases.
@@ -1016,12 +1016,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FMUL,    MVT::v4f64,   {  4,  5, 1, 2 } }, // BTVER2 from http://www.agner.org/
      { ISD::FMUL,    MVT::v8f32,   {  2,  5, 1, 2 } }, // BTVER2 from http://www.agner.org/
  
-    { ISD::FDIV,    MVT::f32,     { 14 } }, // SNB from http://www.agner.org/
-    { ISD::FDIV,    MVT::v4f32,   { 14 } }, // SNB from http://www.agner.org/
-    { ISD::FDIV,    MVT::v8f32,   { 28 } }, // SNB from http://www.agner.org/
-    { ISD::FDIV,    MVT::f64,     { 22 } }, // SNB from http://www.agner.org/
-    { ISD::FDIV,    MVT::v2f64,   { 22 } }, // SNB from http://www.agner.org/
-    { ISD::FDIV,    MVT::v4f64,   { 44 } }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::f32,     { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v4f32,   { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v8f32,   { 28, 29, 1, 3 } }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::f64,     { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v2f64,   { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v4f64,   { 44, 45, 1, 3 } }, // SNB from http://www.agner.org/
    };
  
    if (ST->hasAVX())
@@ -1045,10 +1045,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FMUL, MVT::v2f64,  {  1,  5, 1, 1 } }, // Nehalem from http://www.agner.org/
      { ISD::FMUL, MVT::v4f32,  {  1,  5, 1, 1 } }, // Nehalem from http://www.agner.org/
  
-    { ISD::FDIV,  MVT::f32,   { 14 } }, // Nehalem from http://www.agner.org/
-    { ISD::FDIV,  MVT::v4f32, { 14 } }, // Nehalem from http://www.agner.org/
-    { ISD::FDIV,  MVT::f64,   { 22 } }, // Nehalem from http://www.agner.org/
-    { ISD::FDIV,  MVT::v2f64, { 22 } }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::f32,   { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::v4f32, { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::f64,   { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::v2f64, { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
  
      { ISD::MUL,   MVT::v2i64, {  6 } }  // 3*pmuludq/3*shift/2*add
    };
@@ -1116,10 +1116,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::MUL,  MVT::v4i32,  {  6 } }, // 3*pmuludq/4*shuffle
      { ISD::MUL,  MVT::v2i64,  {  8 } }, // 3*pmuludq/3*shift/2*add
  
-    { ISD::FDIV, MVT::f32,    { 23 } }, // Pentium IV from http://www.agner.org/
-    { ISD::FDIV, MVT::v4f32,  { 39 } }, // Pentium IV from http://www.agner.org/
-    { ISD::FDIV, MVT::f64,    { 38 } }, // Pentium IV from http://www.agner.org/
-    { ISD::FDIV, MVT::v2f64,  { 69 } }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::f32,    { 23, 23, 1, 1 } }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::v4f32,  { 39, 39, 1, 1 } }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::f64,    { 38, 38, 1, 1 } }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::v2f64,  { 69, 69, 1, 1 } }, // Pentium IV from http://www.agner.org/
  
      { ISD::FNEG, MVT::f32,    {  1,  1, 1, 1 } }, // Pentium IV from http://www.agner.org/
      { ISD::FNEG, MVT::f64,    {  1,  1, 1, 1 } }, // Pentium IV from http://www.agner.org/
@@ -1144,8 +1144,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
          return LT.first * KindCost.value();
  
    static const CostKindTblEntry SSE1CostTable[] = {
-    { ISD::FDIV, MVT::f32,   { 17 } }, // Pentium III from http://www.agner.org/
-    { ISD::FDIV, MVT::v4f32, { 34 } }, // Pentium III from http://www.agner.org/
+    { ISD::FDIV, MVT::f32,   { 17, 18, 1, 1 } }, // Pentium III from http://www.agner.org/
+    { ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } }, // Pentium III from http://www.agner.org/
  
      { ISD::FNEG, MVT::f32,   {  2,  2, 1, 2 } }, // Pentium III from http://www.agner.org/
      { ISD::FNEG, MVT::v4f32, {  2,  2, 1, 2 } }, // Pentium III from http://www.agner.org/
@@ -1189,7 +1189,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
      { ISD::FADD, MVT::f64, {  2,  3, 1, 1 } }, // (x87)
      { ISD::FSUB, MVT::f64, {  2,  3, 1, 1 } }, // (x87)
      { ISD::FMUL, MVT::f64, {  2,  5, 1, 1 } }, // (x87)
-    { ISD::FDIV, MVT::f64, { 38 } }, // (x87)
+    { ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } }, // (x87)
    };
  
    if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second))
@@ -5649,6 +5649,15 @@ bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
    return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
  }
  
+bool X86TTIImpl::isExpensiveToSpeculativelyExecute(const Instruction* I) {
+  // FDIV is always expensive, even if it has a very low uop count.
+  // TODO: Still necessary for recent CPUs with low latency/throughput fdiv?
+  if (I->getOpcode() == Instruction::FDiv)
+    return true;
+
+  return BaseT::isExpensiveToSpeculativelyExecute(I);
+}
+
  bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
    return false;
  }
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h

index 7fcfc5e..f74433b 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -254,6 +254,7 @@ public:
    bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
                         const SmallBitVector &OpcodeMask) const;
    bool hasDivRemOp(Type *DataType, bool IsSigned);
+  bool isExpensiveToSpeculativelyExecute(const Instruction *I);
    bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
    bool areInlineCompatible(const Function *Caller,
                             const Function *Callee) const;
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll

index 4e47a66..d6725f4 100644 (file)
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
@@ -511,16 +511,93 @@ define i32 @fmul(i32 %arg) {
  }
  
  define i32 @fdiv(i32 %arg) {
-; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE1-LABEL: 'fdiv'
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %F32 = fdiv float undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 304 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE2-LABEL: 'fdiv'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %F32 = fdiv float undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fdiv'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'fdiv'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'fdiv'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %F32 = fdiv float undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %F64 = fdiv double undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'fdiv'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %F32 = fdiv float undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fdiv'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; GLM-LABEL: 'fdiv'
+; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  ;
    %F32 = fdiv float undef, undef
    %V4F32 = fdiv <4 x float> undef, undef
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll

index 96c918b..7783cd9 100644 (file)
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
@@ -467,16 +467,82 @@ define i32 @fmul(i32 %arg) {
  }
  
  define i32 @fdiv(i32 %arg) {
-; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE1-LABEL: 'fdiv'
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE2-LABEL: 'fdiv'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fdiv'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'fdiv'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'fdiv'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fdiv'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; GLM-LABEL: 'fdiv'
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  ;
    %F32 = fdiv float undef, undef
    %V4F32 = fdiv <4 x float> undef, undef
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 3 Sep 2022 14:48:33 +0000 (15:48 +0100)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 3 Sep 2022 14:48:39 +0000 (15:48 +0100)
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll		patch \| blob \| history
llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll		patch \| blob \| history