From d2c093e79d146f837ce0117f9b331e9e4508d2cf Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Tue, 7 Dec 2021 10:15:38 +0800 Subject: [PATCH] [CostModel][X86] Add i64 mul cost for avx512 as 1cy i64 mul cost is 1cy for all cpu that support avx512. Currently all X86 cpu uses i64 mul cost in X64 cost table which is not true for cpu that support avx512 (skx, icx). Reviewed By: pengfei, RKSimon Differential Revision: https://reviews.llvm.org/D115016 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 1 + llvm/test/Analysis/CostModel/X86/arith-fix.ll | 60 ++-- llvm/test/Analysis/CostModel/X86/arith-overflow.ll | 36 +-- llvm/test/Analysis/CostModel/X86/arith.ll | 6 +- llvm/test/Analysis/CostModel/X86/mul.ll | 8 +- llvm/test/Analysis/CostModel/X86/rem.ll | 306 +++++++++++++-------- 6 files changed, 247 insertions(+), 170 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 869762b..c20241c 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -660,6 +660,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::MUL, MVT::v8i32, 1 }, // pmulld (Skylake from agner.org) { ISD::MUL, MVT::v4i32, 1 }, // pmulld (Skylake from agner.org) { ISD::MUL, MVT::v8i64, 6 }, // 3*pmuludq/3*shift/2*add + { ISD::MUL, MVT::i64, 1 }, // Skylake from http://www.agner.org/ { ISD::FNEG, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ { ISD::FADD, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll index 708ea0f..8a7552a 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -109,11 +109,11 @@ define i32 @smul(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -128,11 +128,11 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'smul' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -147,11 +147,11 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'smul' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -343,11 +343,11 @@ define i32 @umul(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -362,11 +362,11 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'umul' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -381,11 +381,11 @@ define i32 @umul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'umul' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 7a8b7f1..7d5a876 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1066,9 +1066,9 @@ define i32 @smul(i32 %arg) { ; ; AVX512F-LABEL: 'smul' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1085,9 +1085,9 @@ define i32 @smul(i32 %arg) { ; ; AVX512BW-LABEL: 'smul' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1104,9 +1104,9 @@ define i32 @smul(i32 %arg) { ; ; AVX512DQ-LABEL: 'smul' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1304,9 +1304,9 @@ define i32 @umul(i32 %arg) { ; ; AVX512F-LABEL: 'umul' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1323,9 +1323,9 @@ define i32 @umul(i32 %arg) { ; ; AVX512BW-LABEL: 'umul' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1342,9 +1342,9 @@ define i32 @umul(i32 %arg) { ; ; AVX512DQ-LABEL: 'umul' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll index eb7dbd0..2f1eaa7 100644 --- a/llvm/test/Analysis/CostModel/X86/arith.ll +++ b/llvm/test/Analysis/CostModel/X86/arith.ll @@ -1018,7 +1018,7 @@ define i32 @mul(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -1040,7 +1040,7 @@ define i32 @mul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -1062,7 +1062,7 @@ define i32 @mul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'mul' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = mul <8 x i64> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll index 511767b..59c0d32 100644 --- a/llvm/test/Analysis/CostModel/X86/mul.ll +++ b/llvm/test/Analysis/CostModel/X86/mul.ll @@ -460,7 +460,7 @@ define i32 @mul_constnegpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_constnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, @@ -479,7 +479,7 @@ define i32 @mul_constnegpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_constnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, @@ -636,7 +636,7 @@ define i32 @mul_uniformconstnegpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, @@ -655,7 +655,7 @@ define i32 @mul_uniformconstnegpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index c72cb9c..991430f 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -1,35 +1,73 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,GLM -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,BTVER2 define i32 @srem() { -; CHECK-LABEL: 'srem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'srem' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'srem' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'srem' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, undef %V2i64 = srem <2 x i64> undef, undef @@ -55,24 +93,62 @@ define i32 @srem() { } define i32 @urem() { -; CHECK-LABEL: 'urem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'urem' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'urem' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'urem' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, undef %V2i64 = urem <2 x i64> undef, undef @@ -194,10 +270,10 @@ define i32 @srem_const() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_const' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -213,10 +289,10 @@ define i32 @srem_const() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_const' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -370,10 +446,10 @@ define i32 @urem_const() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_const' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -389,10 +465,10 @@ define i32 @urem_const() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_const' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -508,10 +584,10 @@ define i32 @srem_uniformconst() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconst' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -527,10 +603,10 @@ define i32 @srem_uniformconst() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconst' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -646,10 +722,10 @@ define i32 @urem_uniformconst() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconst' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -665,10 +741,10 @@ define i32 @urem_uniformconst() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconst' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -822,7 +898,7 @@ define i32 @srem_constpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_constpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -841,7 +917,7 @@ define i32 @srem_constpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_constpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1117,7 +1193,7 @@ define i32 @srem_uniformconstpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1136,7 +1212,7 @@ define i32 @srem_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1412,10 +1488,10 @@ define i32 @srem_constnegpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_constnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1431,10 +1507,10 @@ define i32 @srem_constnegpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_constnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1588,10 +1664,10 @@ define i32 @urem_constnegpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_constnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1607,10 +1683,10 @@ define i32 @urem_constnegpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_constnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1726,10 +1802,10 @@ define i32 @srem_uniformconstnegpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1745,10 +1821,10 @@ define i32 @srem_uniformconstnegpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1864,10 +1940,10 @@ define i32 @urem_uniformconstnegpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconstnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1883,10 +1959,10 @@ define i32 @urem_uniformconstnegpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconstnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -- 2.7.4