Use a modified version of the D103695 script to determine more accurate throughput/latency/codesize/size-latency cost estimates
{ ISD::BITREVERSE, MVT::v16i8, { 2, 5, 9, 9 } },
{ ISD::BITREVERSE, MVT::v32i8, { 2, 5, 9, 9 } },
{ ISD::BITREVERSE, MVT::v64i8, { 2, 5, 9, 12 } },
- { ISD::BSWAP, MVT::v8i64, { 1 } },
- { ISD::BSWAP, MVT::v16i32, { 1 } },
- { ISD::BSWAP, MVT::v32i16, { 1 } },
+ { ISD::BSWAP, MVT::v2i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i16, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i16, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v32i16, { 1, 1, 1, 2 } },
{ ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
{ ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
{ ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
{ ISD::BITREVERSE, MVT::v16i32, { 9, 13, 20, 20 } },
{ ISD::BITREVERSE, MVT::v32i16, { 9, 13, 20, 20 } },
{ ISD::BITREVERSE, MVT::v64i8, { 6, 11, 17, 17 } },
- { ISD::BSWAP, MVT::v8i64, { 4 } },
- { ISD::BSWAP, MVT::v16i32, { 4 } },
- { ISD::BSWAP, MVT::v32i16, { 4 } },
+ { ISD::BSWAP, MVT::v8i64, { 4, 7, 5, 5 } },
+ { ISD::BSWAP, MVT::v16i32, { 4, 7, 5, 5 } },
+ { ISD::BSWAP, MVT::v32i16, { 4, 7, 5, 5 } },
{ ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
{ ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
{ ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
{ ISD::BITREVERSE, MVT::v16i16, { 5, 11, 10, 17 } },
{ ISD::BITREVERSE, MVT::v16i8, { 3, 6, 9, 9 } },
{ ISD::BITREVERSE, MVT::v32i8, { 4, 5, 9, 15 } },
- { ISD::BSWAP, MVT::v4i64, { 1 } },
- { ISD::BSWAP, MVT::v8i32, { 1 } },
- { ISD::BSWAP, MVT::v16i16, { 1 } },
+ { ISD::BSWAP, MVT::v2i64, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i64, { 1, 3, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i32, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i32, { 1, 3, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i16, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i16, { 1, 3, 1, 2 } },
{ ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
{ ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
{ ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
{ ISD::BITREVERSE, MVT::v8i16, { 8, 13, 10, 16 } },
{ ISD::BITREVERSE, MVT::v32i8, { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert
{ ISD::BITREVERSE, MVT::v16i8, { 7, 7, 9, 13 } },
- { ISD::BSWAP, MVT::v4i64, { 4 } },
- { ISD::BSWAP, MVT::v8i32, { 4 } },
- { ISD::BSWAP, MVT::v16i16, { 4 } },
+ { ISD::BSWAP, MVT::v4i64, { 5, 7, 5, 10 } },
+ { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v8i32, { 5, 7, 5, 10 } },
+ { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
+ { ISD::BSWAP, MVT::v8i16, { 2, 2, 1, 3 } },
{ ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
{ ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } }, // 2 x 128-bit Op + extract/insert
{ ISD::BITREVERSE, MVT::v4i32, { 16, 20, 11, 21 } },
{ ISD::BITREVERSE, MVT::v8i16, { 16, 20, 11, 21 } },
{ ISD::BITREVERSE, MVT::v16i8, { 11, 12, 10, 16 } },
- { ISD::BSWAP, MVT::v2i64, { 1 } },
- { ISD::BSWAP, MVT::v4i32, { 1 } },
- { ISD::BSWAP, MVT::v8i16, { 1 } },
+ { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
{ ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
{ ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
{ ISD::BITREVERSE, MVT::v4i32, { 16, 20, 30, 30 } },
{ ISD::BITREVERSE, MVT::v8i16, { 16, 20, 25, 25 } },
{ ISD::BITREVERSE, MVT::v16i8, { 11, 12, 21, 21 } },
- { ISD::BSWAP, MVT::v2i64, { 7 } },
- { ISD::BSWAP, MVT::v4i32, { 7 } },
- { ISD::BSWAP, MVT::v8i16, { 7 } },
+ { ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 9, 9 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 4, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
{ ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
{ ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ABS, MVT::i64, { 1, 2, 3, 4 } }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
- { ISD::BSWAP, MVT::i64, { 1 } },
+ { ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
{ ISD::CTLZ, MVT::i64, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 1, 1, 1 } }, // BSR+XOR
{ ISD::CTTZ, MVT::i64, { 3 } }, // TEST+BSF+CMOV/BRANCH
{ ISD::BITREVERSE, MVT::i32, { 9, 12, 17, 19 } },
{ ISD::BITREVERSE, MVT::i16, { 9, 12, 17, 19 } },
{ ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
- { ISD::BSWAP, MVT::i32, { 1 } },
- { ISD::BSWAP, MVT::i16, { 1 } }, // ROL
+ { ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
+ { ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
{ ISD::CTLZ, MVT::i32, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i64'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I64 = call i64 @llvm.bswap.i64(i64 %a64)
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i32'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i32'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i32'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I32 = call i32 @llvm.bswap.i32(i32 %a32)
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i16'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i16'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i16'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I16 = call i16 @llvm.bswap.i16(i16 %a16)
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i64'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I64 = call i64 @llvm.bswap.i64(i64 %a64)
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i32'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i32'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i32'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I32 = call i32 @llvm.bswap.i32(i32 %a32)
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I16 = call i16 @llvm.bswap.i16(i16 %a16)
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i64'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I64 = call i64 @llvm.bswap.i64(i64 %a64)
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i32'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i32'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i32'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I32 = call i32 @llvm.bswap.i32(i32 %a32)
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512F-LABEL: 'cost_bswap_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512DQ-LABEL: 'cost_bswap_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; AVX512BW-LABEL: 'cost_bswap_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%I16 = call i16 @llvm.bswap.i16(i16 %a16)
define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
; SSE2-LABEL: 'cost_bswap_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i64'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i64'
define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
; SSE2-LABEL: 'cost_bswap_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i32'
define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
; SSE2-LABEL: 'cost_bswap_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSSE3-LABEL: 'cost_bswap_i16'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'cost_bswap_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'cost_bswap_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'cost_bswap_i16'
define void @test_scalarized_intrinsics() {
%r1 = add %i8 undef, undef
-; CHECK32: cost of 1 {{.*}}bswap.v4i32
-; CHECK64: cost of 1 {{.*}}bswap.v4i32
+; CHECK32: cost of 2 {{.*}}bswap.v4i32
+; CHECK64: cost of 2 {{.*}}bswap.v4i32
%r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
-; CHECK32: cost of 1 {{.*}}bswap.v2i64
-; CHECK64: cost of 1 {{.*}}bswap.v2i64
+; CHECK32: cost of 2 {{.*}}bswap.v2i64
+; CHECK64: cost of 2 {{.*}}bswap.v2i64
%r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
; CHECK32: cost of 11 {{.*}}cttz.v4i32