Update (mainly) vXf32/vXf64 -> vXi8/vXi16 fptosi/fptoui costs based on the worst case costs from the script in D103695.
Move to using legalized types wherever possible, which allows us to prune the cost tables.
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f64, 7 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f64,15 },
+ { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f32,11 },
+ { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f64,31 },
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f64, 7 },
+ { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f32, 5 },
+ { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f64,15 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 1 },
- { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
- { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
{ ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f64, 3 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f32, 5 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
};
static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 1 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 3 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 7 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 7 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 7 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 4 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 15 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 7 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v4f64, 2 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 },
- { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 },
- { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 },
- { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v32i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v32i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 9 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 },
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
- { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 9 },
{ ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 },
{ ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 1 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 1 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 5 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 5 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 6 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 3 },
};
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 4 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
- { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
- { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 6 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 6 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 5 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 5 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 4 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
- { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
- { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
- { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 6 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 6 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 5 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 5 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 8 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8, 4 },
define i32 @fptosi_double_i32(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi double undef to i32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptosi_double_i32'
; SLM-LABEL: 'fptosi_double_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptosi double undef to i32
define i32 @fptosi_double_i16(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi double undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptosi_double_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptosi_double_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptosi_double_i16'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_double_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptosi double undef to i16
define i32 @fptosi_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi double undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptosi_double_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptosi_double_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptosi_double_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_double_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptosi double undef to i8
define i32 @fptosi_float_i32(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi float undef to i32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i32'
define i32 @fptosi_float_i16(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi float undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptosi_float_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptosi_float_i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_float_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptosi float undef to i16
define i32 @fptosi_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi float undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptosi_float_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptosi_float_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptosi_float_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_float_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptosi float undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui double undef to i64
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'fptoui_double_i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui double undef to i64
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_double_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i64'
-; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
-; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui double undef to i64
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = fptoui double undef to i64
define i32 @fptoui_double_i32(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptoui double undef to i32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptoui_double_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptoui_double_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_double_i32'
;
; SLM-LABEL: 'fptoui_double_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
-; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptoui double undef to i32
define i32 @fptoui_double_i16(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptoui double undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptoui_double_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptoui_double_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptoui_double_i16'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_double_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptoui double undef to i16
define i32 @fptoui_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptoui double undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptoui_double_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptoui_double_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptoui_double_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_double_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptoui double undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui float undef to i64
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'fptoui_float_i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui float undef to i64
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_float_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i64'
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
-; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui float undef to i64
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = fptoui float undef to i64
;
; SSE42-LABEL: 'fptoui_float_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptoui_float_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptoui float undef to i32
define i32 @fptoui_float_i16(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptoui float undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptoui_float_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptoui_float_i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_float_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptoui float undef to i16
define i32 @fptoui_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptoui float undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptoui_float_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptoui_float_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptoui_float_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_float_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptoui float undef to i8
target triple = "x86_64-apple-macosx10.8.0"
-; CHECK: cost of 4 for VF 8 For instruction: %conv = fptosi float %tmp to i8
+; CHECK: cost of 1 for VF 1 For instruction: %conv = fptosi float %tmp to i8
define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
entry:
br label %for.body
define void @fptoui_8f64_8i32() #0 {
; SSE-LABEL: @fptoui_8f64_8i32(
-; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
-; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i32
-; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i32
-; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i32
-; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i32
-; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i32
-; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i32
-; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i32
-; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i32
-; SSE-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-; SSE-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-; SSE-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-; SSE-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-; SSE-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; SSE-NEXT: [[TMP3:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i32>
+; SSE-NEXT: [[TMP4:%.*]] = fptoui <4 x double> [[TMP2]] to <4 x i32>
+; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
+; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
; AVX1-LABEL: @fptoui_8f64_8i32(