}
static const CostKindTblEntry GLMCostTable[] = {
- { ISD::FDIV, MVT::f32, { 18 } }, // divss
- { ISD::FDIV, MVT::v4f32, { 35 } }, // divps
- { ISD::FDIV, MVT::f64, { 33 } }, // divsd
- { ISD::FDIV, MVT::v2f64, { 65 } }, // divpd
+ { ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } }, // divss
+ { ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } }, // divps
+ { ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } }, // divsd
+ { ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } }, // divpd
};
if (ST->useGLMDivSqrtCosts())
{ ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } }, // mulss
{ ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } }, // mulpd
{ ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } }, // mulps
- { ISD::FDIV, MVT::f32, { 17 } }, // divss
- { ISD::FDIV, MVT::v4f32, { 39 } }, // divps
- { ISD::FDIV, MVT::f64, { 32 } }, // divsd
- { ISD::FDIV, MVT::v2f64, { 69 } }, // divpd
+ { ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } }, // divss
+ { ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } }, // divps
+ { ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } }, // divsd
+ { ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } }, // divpd
{ ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } }, // addpd
{ ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } }, // subpd
// v2i64/v4i64 mul is custom lowered as a series of long:
{ ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
{ ISD::FMUL, MVT::f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::f64, { 4 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, { 4 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::v4f64, { 8 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::v8f64, { 16 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } }, // Skylake from http://www.agner.org/
{ ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } }, // Skylake from http://www.agner.org/
{ ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
{ ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
{ ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::f32, { 3 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 3 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::v8f32, { 5 } }, // Skylake from http://www.agner.org/
- { ISD::FDIV, MVT::v16f32, { 10 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } }, // Skylake from http://www.agner.org/
};
if (ST->hasAVX512())
{ ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } }, // vmulpd
{ ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } }, // vmulps
- { ISD::FDIV, MVT::f32, { 7 } }, // Haswell from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 7 } }, // Haswell from http://www.agner.org/
- { ISD::FDIV, MVT::v8f32, { 14 } }, // Haswell from http://www.agner.org/
- { ISD::FDIV, MVT::f64, { 14 } }, // Haswell from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, { 14 } }, // Haswell from http://www.agner.org/
- { ISD::FDIV, MVT::v4f64, { 28 } }, // Haswell from http://www.agner.org/
+ { ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } }, // vdivss
+ { ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } }, // vdivps
+ { ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } }, // vdivps
+ { ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } }, // vdivsd
+ { ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } }, // vdivpd
+ { ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } }, // vdivpd
};
// Look for AVX2 lowering tricks for custom cases.
{ ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/
{ ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/
- { ISD::FDIV, MVT::f32, { 14 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 14 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v8f32, { 28 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::f64, { 22 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, { 22 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v4f64, { 44 } }, // SNB from http://www.agner.org/
+ { ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
+ { ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } }, // SNB from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } }, // SNB from http://www.agner.org/
};
if (ST->hasAVX())
{ ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/
{ ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::f32, { 14 } }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 14 } }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::f64, { 22 } }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, { 22 } }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
{ ISD::MUL, MVT::v2i64, { 6 } } // 3*pmuludq/3*shift/2*add
};
{ ISD::MUL, MVT::v4i32, { 6 } }, // 3*pmuludq/4*shuffle
{ ISD::MUL, MVT::v2i64, { 8 } }, // 3*pmuludq/3*shift/2*add
- { ISD::FDIV, MVT::f32, { 23 } }, // Pentium IV from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 39 } }, // Pentium IV from http://www.agner.org/
- { ISD::FDIV, MVT::f64, { 38 } }, // Pentium IV from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, { 69 } }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } }, // Pentium IV from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } }, // Pentium IV from http://www.agner.org/
{ ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } }, // Pentium IV from http://www.agner.org/
{ ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } }, // Pentium IV from http://www.agner.org/
return LT.first * KindCost.value();
static const CostKindTblEntry SSE1CostTable[] = {
- { ISD::FDIV, MVT::f32, { 17 } }, // Pentium III from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 34 } }, // Pentium III from http://www.agner.org/
+ { ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } }, // Pentium III from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } }, // Pentium III from http://www.agner.org/
{ ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } }, // Pentium III from http://www.agner.org/
{ ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } }, // Pentium III from http://www.agner.org/
{ ISD::FADD, MVT::f64, { 2, 3, 1, 1 } }, // (x87)
{ ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } }, // (x87)
{ ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // (x87)
- { ISD::FDIV, MVT::f64, { 38 } }, // (x87)
+ { ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } }, // (x87)
};
if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second))
return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
}
+bool X86TTIImpl::isExpensiveToSpeculativelyExecute(const Instruction* I) {
+ // FDIV is always expensive, even if it has a very low uop count.
+ // TODO: Still necessary for recent CPUs with low latency/throughput fdiv?
+ if (I->getOpcode() == Instruction::FDiv)
+ return true;
+
+ return BaseT::isExpensiveToSpeculativelyExecute(I);
+}
+
bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
return false;
}
}
define i32 @fdiv(i32 %arg) {
-; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE1-LABEL: 'fdiv'
+; SSE1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = fdiv float undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE2-LABEL: 'fdiv'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %F32 = fdiv float undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fdiv'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'fdiv'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'fdiv'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %F32 = fdiv float undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F64 = fdiv double undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'fdiv'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %F32 = fdiv float undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fdiv'
+; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; GLM-LABEL: 'fdiv'
+; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = fdiv float undef, undef
%V4F32 = fdiv <4 x float> undef, undef
}
define i32 @fdiv(i32 %arg) {
-; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE1-LABEL: 'fdiv'
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE2-LABEL: 'fdiv'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fdiv'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'fdiv'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'fdiv'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fdiv'
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; GLM-LABEL: 'fdiv'
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = fdiv float undef, undef
%V4F32 = fdiv <4 x float> undef, undef