These require special handling to account for their expansion in lowering.
I'm trying very hard not to have to add predicate specific costs - but it might be inevitable.....
Pred == CmpInst::BAD_FCMP_PREDICATE))
Pred = cast<CmpInst>(I)->getPredicate();
- // TODO: Handle pre-AVX FCMP_ONE/FCMP_UEQ slow cases.
switch (Pred) {
case CmpInst::Predicate::ICMP_NE:
// xor(cmpeq(x,y),-1)
ExtraCost = 3;
}
break;
+ case CmpInst::Predicate::FCMP_ONE:
+ case CmpInst::Predicate::FCMP_UEQ:
+ // Without AVX we need to expand FCMP_ONE/FCMP_UEQ cases.
+ // Use FCMP_UEQ expansion - FCMP_ONE should be the same.
+ if (CondTy && !ST->hasAVX())
+ return getCmpSelInstrCost(Opcode, ValTy, CondTy,
+ CmpInst::Predicate::FCMP_UNO, CostKind) +
+ getCmpSelInstrCost(Opcode, ValTy, CondTy,
+ CmpInst::Predicate::FCMP_OEQ, CostKind) +
+ getArithmeticInstrCost(Instruction::Or, CondTy, CostKind);
+
+ break;
case CmpInst::Predicate::BAD_ICMP_PREDICATE:
case CmpInst::Predicate::BAD_FCMP_PREDICATE:
// Assume worst case scenario and add the maximum extra cost.
define i32 @cmp_float_one(i32 %arg) {
; SSE-LABEL: 'cmp_float_one'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_one'
define i32 @cmp_float_ueq(i32 %arg) {
; SSE-LABEL: 'cmp_float_ueq'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_ueq'
define i32 @cmp_float_one(i32 %arg) {
; SSE-LABEL: 'cmp_float_one'
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fcmp one float undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fcmp one double undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX1-LABEL: 'cmp_float_one'
define i32 @cmp_float_ueq(i32 %arg) {
; SSE-LABEL: 'cmp_float_ueq'
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fcmp ueq float undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fcmp ueq double undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX1-LABEL: 'cmp_float_ueq'
define i32 @cmp_float_one(i32 %arg) {
; SSE-LABEL: 'cmp_float_one'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_one'
define i32 @cmp_float_ueq(i32 %arg) {
; SSE-LABEL: 'cmp_float_ueq'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
+; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_ueq'
define i32 @cmp_float_one(i32 %arg) {
; SSE2-LABEL: 'cmp_float_one'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'cmp_float_one'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'cmp_float_one'
define i32 @cmp_float_ueq(i32 %arg) {
; SSE2-LABEL: 'cmp_float_ueq'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'cmp_float_ueq'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
+; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'cmp_float_ueq'