From dc113dc7ed08e28c241dcf3b85055a75dff53ffd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 7 Jul 2018 16:53:30 +0000 Subject: [PATCH] [CostModel][X86] Add SREM/UREM general and constant costs (PR38056) We penalize general SDIV/UDIV costs but don't do the same for SREM/UREM. This patch makes general vector SREM/UREM x20 as costly as scalar, the same approach as we do for SDIV/UDIV. The patch also extends the existing SDIV/UDIV constant costs for SREM/UREM - at the moment this means the additional cost of a MUL+SUB (see D48975). Differential Revision: https://reviews.llvm.org/D48980 llvm-svn: 336486 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 34 +- llvm/test/Analysis/CostModel/X86/rem.ll | 964 +++++++++++++++++++------ 2 files changed, 779 insertions(+), 219 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 8132e77..bae2ef8 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -244,11 +244,12 @@ int X86TTIImpl::getArithmeticInstrCost( } } - if ((ISD == ISD::SDIV || ISD == ISD::UDIV || ISD == ISD::UREM) && + if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || + ISD == ISD::UREM) && (Op2Info == TargetTransformInfo::OK_UniformConstantValue || Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { - if (ISD == ISD::SDIV) { + if (ISD == ISD::SDIV || ISD == ISD::SREM) { // On X86, vector signed division by constants power-of-two are // normally expanded to the sequence SRA + SRL + ADD + SRA. // The OperandValue properties may not be the same as that of the previous @@ -264,6 +265,12 @@ int X86TTIImpl::getArithmeticInstrCost( TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); + if (ISD == ISD::SREM) { + // For SREM: (X % C) is the equivalent of (X - (X/C)*C) + Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info); + Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info); + } + return Cost; } @@ -285,7 +292,9 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence + { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence + { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -301,7 +310,9 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v8i64, 1 }, { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence + { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence + { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -319,9 +330,13 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence + { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence + { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence + { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence + { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -341,13 +356,21 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split. { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split. + { ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split. { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence + { ISD::SREM, MVT::v8i16, 8 }, // pmulhw+mul+sub sequence { ISD::UDIV, MVT::v16i16, 12+2 }, // 2*pmulhuw sequence + split. + { ISD::UREM, MVT::v16i16, 16+2 }, // 2*pmulhuw+mul+sub sequence + split. { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence + { ISD::UREM, MVT::v8i16, 8 }, // pmulhuw+mul+sub sequence { ISD::SDIV, MVT::v8i32, 38+2 }, // 2*pmuludq sequence + split. + { ISD::SREM, MVT::v8i32, 48+2 }, // 2*pmuludq+mul+sub sequence + split. { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence + { ISD::SREM, MVT::v4i32, 24 }, // pmuludq+mul+sub sequence { ISD::UDIV, MVT::v8i32, 30+2 }, // 2*pmuludq sequence + split. + { ISD::UREM, MVT::v8i32, 40+2 }, // 2*pmuludq+mul+sub sequence + split. { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence + { ISD::UREM, MVT::v4i32, 20 }, // pmuludq+mul+sub sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -355,8 +378,12 @@ int X86TTIImpl::getArithmeticInstrCost( // pmuldq sequence. if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) return LT.first * 32; + if (ISD == ISD::SREM && LT.second == MVT::v8i32 && ST->hasAVX()) + return LT.first * 38; if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) return LT.first * 15; + if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41()) + return LT.first * 20; // XOP has faster vXi8 shifts. if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) || @@ -765,7 +792,8 @@ int X86TTIImpl::getArithmeticInstrCost( // anyways so try hard to prevent vectorization of division - it is // generally a bad idea. Assume somewhat arbitrarily that we have to be able // to hide "20 cycles" for each lane. - if ((ISD == ISD::SDIV || ISD == ISD::UDIV) && LT.second.isVector()) { + if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || + ISD == ISD::UDIV || ISD == ISD::UREM)) { int ScalarCost = getArithmeticInstrCost( Opcode, Ty->getScalarType(), Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index 8189e20..f1165e7 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -7,47 +7,47 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,GLM ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 define i32 @srem() { ; CHECK-LABEL: 'srem' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = srem <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = srem <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = srem <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = srem <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = srem <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = srem <32 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = srem <4 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = srem <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = srem <16 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = srem <8 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = srem <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = srem <32 x i16> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, undef @@ -76,40 +76,40 @@ define i32 @srem() { define i32 @urem() { ; CHECK-LABEL: 'urem' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = urem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = urem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = urem <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = urem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = urem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = urem <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = urem <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = urem <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = urem <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = urem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = urem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = urem <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = urem <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = urem <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = urem <32 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = urem <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = urem <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = urem <64 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = urem <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = urem <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = urem <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = urem <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = urem <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = urem <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = urem <4 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = urem <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = urem <16 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = urem <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = urem <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = urem <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = urem <8 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = urem <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = urem <32 x i16> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = urem <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = urem <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = urem <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, undef @@ -138,40 +138,40 @@ define i32 @urem() { define i32 @srem_const() { ; CHECK-LABEL: 'srem_const' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = srem <4 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = srem <8 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = srem <16 x i32> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = srem <8 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = srem <16 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = srem <32 x i16> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_const' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = srem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -200,40 +200,40 @@ define i32 @srem_const() { define i32 @urem_const() { ; CHECK-LABEL: 'urem_const' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = urem <4 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = urem <8 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = urem <16 x i32> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = urem <8 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = urem <16 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = urem <32 x i16> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_const' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = urem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = urem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = urem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = urem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = urem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = urem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = urem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = urem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = urem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = urem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = urem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = urem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = urem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = urem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = urem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -260,42 +260,194 @@ define i32 @urem_const() { } define i32 @srem_uniformconst() { -; CHECK-LABEL: 'srem_uniformconst' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'srem_uniformconst' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'srem_uniformconst' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE42-LABEL: 'srem_uniformconst' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'srem_uniformconst' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'srem_uniformconst' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'srem_uniformconst' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'srem_uniformconst' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'srem_uniformconst' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = srem <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'srem_uniformconst' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = srem <8 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = srem <16 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconst' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -322,42 +474,118 @@ define i32 @srem_uniformconst() { } define i32 @urem_uniformconst() { -; CHECK-LABEL: 'urem_uniformconst' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = urem <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = urem <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = urem <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = urem <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'urem_uniformconst' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = urem <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'urem_uniformconst' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'urem_uniformconst' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'urem_uniformconst' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'urem_uniformconst' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_uniformconst' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = urem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = urem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = urem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16i32 = urem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = urem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = urem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = urem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = urem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = urem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = urem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -384,42 +612,194 @@ define i32 @urem_uniformconst() { } define i32 @srem_constpow2() { -; CHECK-LABEL: 'srem_constpow2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'srem_constpow2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 592 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'srem_constpow2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4i64 = srem <4 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i32 = srem <4 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8i32 = srem <8 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V8i16 = srem <8 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V16i16 = srem <16 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16i8 = srem <16 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V32i8 = srem <32 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 592 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE42-LABEL: 'srem_constpow2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'srem_constpow2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'srem_constpow2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'srem_constpow2' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'srem_constpow2' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'srem_constpow2' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = srem <4 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = srem <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8i16 = srem <8 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16i16 = srem <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'srem_constpow2' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4i64 = srem <4 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8i64 = srem <8 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i32 = srem <4 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i32 = srem <8 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16i32 = srem <16 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16i16 = srem <16 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V32i16 = srem <32 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i8 = srem <16 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32i8 = srem <32 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V64i8 = srem <64 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_constpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -565,42 +945,194 @@ define i32 @urem_constpow2() { } define i32 @srem_uniformconstpow2() { -; CHECK-LABEL: 'srem_uniformconstpow2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'srem_uniformconstpow2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'srem_uniformconstpow2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i8 = srem <32 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE42-LABEL: 'srem_uniformconstpow2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'srem_uniformconstpow2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'srem_uniformconstpow2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'srem_uniformconstpow2' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'srem_uniformconstpow2' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'srem_uniformconstpow2' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'srem_uniformconstpow2' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i8 = srem <32 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64i8 = srem <64 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconstpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 -- 2.7.4